# Title:        Liquefied petroleum gas (LPG) adoption by Indigenous and marginalized populations in 
#               forested regions of central India
# Code authors: Sarika Khanwilkar, Carlos F. Gould
# Journal:      Energy Research & Social Science
# Contact:      sk4335@columbia.edu


# 0. Libraries ------


library("psych") #functions: corr.test, describe
library("corrplot") #correlation viz
library("doBy") 
library("gmodels")
library("tidyverse")
library("naniar")
library("moments")
library("car")
library("visreg")
library("bbmle")
library("reshape2")
library("broom")
library("readxl")
library("aod")
library("scales")
library("dotwhisker")
library("raster")
library("sf") #spatial
library("arsenal")
library("plyr") #join function
library("lme4")
library("ggplot2")

library("mfx")
library("margins")

#font
library("extrafont")
font_import()

# 1. Data -----
# Note: Household survey and qualitative data are not publicly available.

# 1.1 Household survey data ------

nasa <- read_excel("LPG_householdsurveyanonymous.xlsx")

# 2. Data processing ------

# 2.2 Road distance OSM Feb 2018 ------ 

nasa$road_osm <- nasa$road_dist_km
describe(nasa$road_osm)
hist(nasa$road_osm)
nasa$logroad_osm <- nasa$road_osm + 1
nasa$logroad_osm <- log(nasa$logroad_osm)
nasa$logroad_osm.std <- scale(nasa$logroad_osm, center=T, scale=F)
describe(nasa$logroad_osm.std)
hist(nasa$logroad_osm.std)

# 2.3 Caste dummy -------
par(mfrow=c(1,1))
#q206_caste, 1 forward caste, 2 other backward caste, 3 scheduled caste 4 schedule tribe 5 other
table(nasa$q206_caste)
mean(nasa$q206_caste)
describe(nasa$q206_caste)
#rid of 1 -999
nasa <- nasa[(nasa$q206_caste>-1),]
describe(nasa$q206_caste)
#NEW N 
table(nasa$q206_caste)

#dummy variable for caste
nasa$q206_caste_forward <- ifelse((nasa$q206_caste==1), 1, 0)
nasa$q206_caste_otherbackward <- ifelse((nasa$q206_caste==2), 1, 0)
nasa$q206_caste_scheduledcaste <- ifelse((nasa$q206_caste==3), 1, 0)
nasa$q206_caste_scheduledtribe <- ifelse((nasa$q206_caste==4), 1, 0)

nasa$q206_caste_notforward <- ifelse((nasa$q206_caste!=1), 1,0)

table(nasa$q206_caste_forward)
table(nasa$q206_caste_otherbackward)
table(nasa$q206_caste_scheduledcaste)
table(nasa$q206_caste_scheduledtribe)

# 2.4 Firewood  ------

#------FW days/wk
table(nasa$q502a_rabi_firewood_daysweek)
describe(nasa$q502a_rabi_firewood_daysweek) #N=4996
nasa <- nasa[(nasa$q502a_rabi_firewood_daysweek>-1),]
table(nasa$q502a_rabi_firewood_daysweek)

table(nasa$q502a_monsoon_firewood_daysweek)
describe(nasa$q502a_monsoon_firewood_daysweek) #1 NA N = 4995
nasa <- nasa[(nasa$q502a_monsoon_firewood_daysweek>-1),]
table(nasa$q502a_monsoon_firewood_daysweek)

table(nasa$q502a_summer_firewood_daysweek)
describe(nasa$q502a_summer_firewood_daysweek) #4994 

table(nasa$q502a_winter_firewood_daysweek)
describe(nasa$q502a_winter_firewood_daysweek)

#----use FW for cooking?
table(nasa$q506_firewoodcooking)
table(nasa$q502a_rabi_firewood_yesno)
table(nasa$q502a_monsoon_firewood_yesno)
nasa$q502_firewoodcollectanyseason <- ifelse((nasa$q502a_rabi_firewood_yesno==1 | nasa$q502a_monsoon_firewood_yesno==1 |
                                                nasa$q502a_winter_firewood_yesno==1 | nasa$q502a_summer_firewood_yesno==1), 1, 0)
table(nasa$q502_firewoodcollectanyseason)

#----do you collect FW
nasa$q502a_rabi_firewood_yesno <- ifelse((nasa$q502a_rabi_firewood_daysweek==0), 0, 1)
table(nasa$q502a_rabi_firewood_yesno)
describe(nasa$q502a_rabi_firewood_yesno)

nasa$q502a_monsoon_firewood_yesno <- ifelse((nasa$q502a_monsoon_firewood_daysweek==0), 0, 1)
table(nasa$q502a_monsoon_firewood_yesno)

nasa$q502a_summer_firewood_yesno <- ifelse((nasa$q502a_summer_firewood_daysweek==0), 0, 1)
table(nasa$q502a_summer_firewood_yesno)
nasa$q502a_winter_firewood_yesno <- ifelse((nasa$q502a_winter_firewood_daysweek==0), 0, 1)
table(nasa$q502a_winter_firewood_yesno)

#seasonal firewood patterns

#is fw collection consistent across seasons for 1 household? All seasons
nasa$consistentseasonalfirewood <- ifelse((nasa$q502a_summer_firewood_yesno==1 & nasa$q502a_winter_firewood_yesno==1 & nasa$q502a_rabi_firewood_yesno==1 &nasa$q502a_monsoon_firewood_yesno==1), 1, 0)
table(nasa$consistentseasonalfirewood) #31% (1543) throughout all 4 seasons, 1661 collect fw in monsoon.
#not including monsoon
nasa$consistent3season <- ifelse((nasa$q502a_summer_firewood_yesno==1 & nasa$q502a_winter_firewood_yesno==1 & nasa$q502a_rabi_firewood_yesno==1), 1, 0)
table(nasa$consistent3season) #60% stay consistent throughout 3 seasons

#hhs reporting monsoon fw collection also collected during other seasons
nasamonsoon <- subset(nasa, q502a_monsoon_firewood_yesno==1)
table(nasamonsoon$q502a_winter_firewood_yesno)
table(nasamonsoon$q502a_rabi_firewood_yesno)
table(nasamonsoon$q502a_summer_firewood_yesno)

nasamonsoon$consistent3season <- ifelse((nasamonsoon$q502a_summer_firewood_yesno==1 & nasamonsoon$q502a_winter_firewood_yesno==1 & nasamonsoon$q502a_rabi_firewood_yesno==1), 1, 0)
table(nasamonsoon$consistent3season) #93% who collected in monsoon also collected consistently in other seasons

#FW hrs/day
par(mfrow=c(2,2))
table(nasa$q502b_rabi_firewood_hoursday)
str(nasa$q502b_rabi_firewood_hoursday)
mean(nasa$q502b_rabi_firewood_hoursday)
describe(nasa$q502b_rabi_firewood_hoursday)
nasa[,"q502b_rabi_firewood_hoursday"][is.na(nasa[,"q502b_rabi_firewood_hoursday"])] = 0
describe(nasa$q502b_rabi_firewood_hoursday)
table(nasa$q502b_rabi_firewood_hoursday)

str(nasa$q502b_monsoon_firewood_hoursday)
describe(nasa$q502b_monsoon_firewood_hoursday)
nasa[,"q502b_monsoon_firewood_hoursday"][is.na(nasa[,"q502b_monsoon_firewood_hoursday"])] = 0 
describe(nasa$q502b_monsoon_firewood_hoursday)
table(nasa$q502b_monsoon_firewood_hoursday)

str(nasa$q502b_summer_firewood_hoursday)
describe(nasa$q502b_summer_firewood_hoursday)
nasa[,"q502b_summer_firewood_hoursday"][is.na(nasa[,"q502b_summer_firewood_hoursday"])] = 0 
describe(nasa$q502b_summer_firewood_hoursday)
table(nasa$q502b_summer_firewood_hoursday)

str(nasa$q502b_winter_firewood_hoursday)
nasa[,"q502b_winter_firewood_hoursday"][is.na(nasa[,"q502b_winter_firewood_hoursday"])] = 0 
str(nasa$q502b_winter_firewood_hoursday)
describe(nasa$q502b_winter_firewood_hoursday)
table(nasa$q502b_winter_firewood_hoursday)

# 2.4 Firewood (hrs/wk) variable -------
describe(nasa$q502b_rabi_firewood_hoursday)
str(nasa$q502a_rabi_firewood_daysweek)
nasa[,"q502a_rabi_firewood_daysweek"][is.na(nasa[,"q502a_rabi_firewood_daysweek"])] = 0 
str(nasa$q502a_rabi_firewood_daysweek)
#new variable
nasa$q502a_rabi_firewood_hoursweek <- nasa$q502a_rabi_firewood_daysweek*nasa$q502b_rabi_firewood_hoursday
describe(nasa$q502a_rabi_firewood_hoursweek)

table(nasa$q502b_monsoon_firewood_hoursday)
describe(nasa$q502b_monsoon_firewood_hoursday)

str(nasa$q502a_monsoon_firewood_daysweek)
nasa[,"q502a_monsoon_firewood_daysweek"][is.na(nasa[,"q502a_monsoon_firewood_daysweek"])] = 0 
str(nasa$q502a_monsoon_firewood_daysweek)
#new variable
nasa$q502a_monsoon_firewood_hoursweek <- nasa$q502a_monsoon_firewood_daysweek*nasa$q502b_monsoon_firewood_hoursday
table(nasa$q502a_monsoon_firewood_hoursweek)
describe(nasa$q502a_monsoon_firewood_hoursweek)

table(nasa$q502a_summer_firewood_daysweek)
str(nasa$q502a_summer_firewood_daysweek)
nasa[,"q502a_summer_firewood_daysweek"][is.na(nasa[,"q502a_summer_firewood_daysweek"])] = 0 
str(nasa$q502a_summer_firewood_daysweek)
table(nasa$q502b_summer_firewood_hoursday) #only 1 is 0 hours / day
#new variable 
nasa$q502a_summer_firewood_hoursweek <- nasa$q502a_summer_firewood_daysweek*nasa$q502b_summer_firewood_hoursday
table(nasa$q502a_summer_firewood_hoursweek)
describe(nasa$q502a_summer_firewood_hoursweek)

str(nasa$q502a_winter_firewood_daysweek)
nasa[,"q502a_winter_firewood_daysweek"][is.na(nasa[,"q502a_winter_firewood_daysweek"])] = 0 
str(nasa$q502a_winter_firewood_daysweek)
nasa$q502a_winter_firewood_hoursweek <- nasa$q502a_winter_firewood_daysweek*nasa$q502b_winter_firewood_hoursday
table(nasa$q502a_winter_firewood_hoursweek)
describe(nasa$q502a_winter_firewood_hoursweek)

#log transform hrs/wk for every season + rabi/winter/summer
par(mfrow=c(2,2))
nasa$logq502a_winter_firewood_hoursweek <- nasa$q502a_winter_firewood_hoursweek+1
head(nasa$logq502a_winter_firewood_hoursweek)
head(nasa$q502a_winter_firewood_hoursweek)

nasa$logq502a_winter_firewood_hoursweek <- log(nasa$logq502a_winter_firewood_hoursweek)
head(nasa$logq502a_winter_firewood_hoursweek)

nasa$logq502a_summer_firewood_hoursweek <- nasa$q502a_summer_firewood_hoursweek+1
nasa$logq502a_summer_firewood_hoursweek <- log(nasa$logq502a_summer_firewood_hoursweek)

nasa$logq502a_rabi_firewood_hoursweek <- nasa$q502a_rabi_firewood_hoursweek+1
nasa$logq502a_rabi_firewood_hoursweek <- log(nasa$logq502a_rabi_firewood_hoursweek)

nasa$logq502a_monsoon_firewood_hoursweek <- nasa$q502a_monsoon_firewood_hoursweek+1
nasa$logq502a_monsoon_firewood_hoursweek <- log(nasa$logq502a_monsoon_firewood_hoursweek)

#log and standardize for final outcome variables
nasa$q502a_firewood_hoursweek_3season <- (nasa$q502a_summer_firewood_hoursweek + nasa$q502a_rabi_firewood_hoursweek + nasa$q502a_winter_firewood_hoursweek)/3

describe(nasa$q502a_firewood_hoursweek_3season)
quantile(nasa$q502a_firewood_hoursweek_3season)

nasa$logq502a_firewood_hoursweek_3season <- nasa$q502a_firewood_hoursweek_3season + 1
nasa$logq502a_firewood_hoursweek_3season <- log(nasa$logq502a_firewood_hoursweek_3season)
describe(nasa$logq502a_firewood_hoursweek_3season)
describe(nasa$logq502a_monsoon_firewood_hoursweek)

#standardize
nasa$logq502a_firewood_hoursweek_3season.std <- scale(nasa$logq502a_firewood_hoursweek_3season, center=T, scale=F)
nasa$logq502a_monsoon_firewood_hoursweek.std <- scale(nasa$logq502a_monsoon_firewood_hoursweek, center=T, scale=F)

#average time spent collect fw in hhs that do collect
nasacollect <- subset(nasa, q502a_monsoon_firewood_yesno==1 | q502a_winter_firewood_yesno==1 | q502a_rabi_firewood_yesno==1 |
                        q502a_summer_firewood_yesno==1)


#s 
nasacollect$q502a_firewood_hoursweek_3season <- (nasacollect$q502a_summer_firewood_hoursweek + nasacollect$q502a_rabi_firewood_hoursweek + nasacollect$q502a_winter_firewood_hoursweek)/3

describe(nasacollect$q502a_firewood_hoursweek_3season)
quantile(nasacollect$q502a_firewood_hoursweek_3season)

#nasa fw all season stats

nasacollect$q502a_firewood_hoursweek_allseasons <- (nasacollect$q502a_summer_firewood_hoursweek + nasacollect$q502a_rabi_firewood_hoursweek + nasacollect$q502a_winter_firewood_hoursweek + nasacollect$q502a_monsoon_firewood_hoursweek)/4

table(nasacollect$q201_gender)
table(nasa$q201_gender)

describe(nasacollect$q502a_firewood_hoursweek_allseasons)
quantile(nasacollect$q502a_firewood_hoursweek_allseasons)

# 2.5 Why firewood collection? ------
table(nasa$q502e_winter_whyfor)
table(nasa$q502e_rabi_whyfor)
table(nasa$q502e_summer_whyfor)
table(nasa$q502e_monsoon_whyfor)

# 2.6 Hansen tree cover modification and correlation ------
#1 km
nasa$Hansen1kmog <- nasa$Hansen1km
describe(nasa$Hansen1kmog)
hist(nasa$Hansen1kmog)
nasa$logHansen1km <- nasa$Hansen1km + 1
nasa$logHansen1km <- log(nasa$logHansen1km)
nasa$logHansen1km.std <- scale(nasa$logHansen1km, center=T, scale=F)
describe(nasa$logHansen1km.std)
hist(nasa$logHansen1km.std)
nasa$Hansen1km <- nasa$logHansen1km.std

#2 km
nasa$Hansen2kmog <- nasa$Hansen2km
nasa$logHansen2km <- nasa$Hansen2km + 1
nasa$logHansen2km <- log(nasa$logHansen2km)
nasa$logHansen2km.std <- scale(nasa$logHansen2km, center=T, scale=F)
describe(nasa$logHansen2km.std)
hist(nasa$logHansen2km.std)
nasa$Hansen2km <- nasa$logHansen2km.std

#2.74 
nasa$Hansen2.74kmog <- nasa$Hansen2.74km
nasa$logHansen2.74km <- nasa$Hansen2.74km + 1
nasa$logHansen2.74km <- log(nasa$logHansen2.74km)
nasa$logHansen2.74km.std <- scale(nasa$logHansen2.74km, center=T, scale=F)
describe(nasa$logHansen2.74km.std)
hist(nasa$logHansen2.74km.std)
nasa$Hansen2.74km <- nasa$logHansen2.74km.std

# 3 km
nasa$Hansen3kmog <- nasa$Hansen3km
nasa$logHansen3km <- nasa$Hansen3km + 1
nasa$logHansen3km <- log(nasa$logHansen3km)
nasa$logHansen3km.std <- scale(nasa$logHansen3km, center=T, scale=F)
describe(nasa$logHansen3km.std)
hist(nasa$logHansen3km.std)
nasa$Hansen3km <- nasa$logHansen3km.std

#5 km
nasa$Hansen5kmog <- nasa$Hansen5km
nasa$logHansen5km <- nasa$Hansen5km + 1
nasa$logHansen5km <- log(nasa$logHansen5km)
nasa$logHansen5km.std <- scale(nasa$logHansen5km, center=T, scale=F)
describe(nasa$logHansen5km.std)
hist(nasa$logHansen5km.std)
nasa$Hansen5km <- nasa$logHansen5km.std

#8 km 
nasa$Hansen8kmog <- nasa$Hansen8km
nasa$logHansen8km <- nasa$Hansen8km + 1
nasa$logHansen8km <- log(nasa$logHansen8km)
nasa$logHansen8km.std <- scale(nasa$logHansen8km, center=T, scale=F)
describe(nasa$logHansen8km.std)
hist(nasa$logHansen8km.std)
nasa$Hansen8km <- nasa$logHansen8km.std

#10 
nasa$Hansen10kmog <- nasa$Hansen10km
nasa$logHansen10km <- nasa$Hansen10km + 1
nasa$logHansen10km <- log(nasa$logHansen10km)
nasa$logHansen10km.std <- scale(nasa$logHansen10km, center=T, scale=F)
describe(nasa$logHansen10km.std)
hist(nasa$logHansen10km.std)
nasa$Hansen10km <- nasa$logHansen10km.std

cor <- cbind(nasa$Hansen1km, nasa$Hansen2km, nasa$Hansen2.74km, nasa$Hansen3km, nasa$Hansen5km, nasa$Hansen8km, nasa$Hansen10km)
colnames(cor) <- c("1km", "2km", "2.74km", "3km", "5km", "8km", "10km")
c2 <- round(cor(cor),2)
c2 #all highly correlated of course - 1 and 10 km most non-correlated

describe(nasa$Hansen1kmog) #smallest mean and smallest range of other buffer distances
describe(nasa$Hansen2kmog)
describe(nasa$Hansen2.74kmog)
describe(nasa$Hansen3kmog)
describe(nasa$Hansen5kmog)
describe(nasa$Hansen8kmog)
describe(nasa$Hansen10kmog)

#2.7 Perception: hh collect more of less fw 5 yrs compare (+dummy) -----
#compared to 5 yrs ago, does household collect more or less firewood? 1 much more 2 somehwat more 3 same amount 4 somewhat less 5 much less
table(nasa$q502d_rabi_firewoodcompare)
describe(nasa$q502d_rabi_firewoodcompare)
str(nasa$q502d_rabi_firewoodcompare)
nasa$q502d_rabi_firewoodcompare[is.na(nasa$q502d_rabi_firewoodcompare)] <- 0 
str(nasa$q502d_rabi_firewoodcompare)

nasa$q502d_rabi_woodmuchmore <- ifelse((nasa$q502d_rabi_firewoodcompare==1), 1, 0)
nasa$q502d_rabi_woodmore <- ifelse((nasa$q502d_rabi_firewoodcompare==2), 1, 0)
nasa$q502d_rabi_woodsame <- ifelse((nasa$q502d_rabi_firewoodcompare==3), 1, 0)
nasa$q502d_rabi_woodless <- ifelse((nasa$q502d_rabi_firewoodcompare==4), 1, 0)
nasa$q502d_rabi_woodmuchless <- ifelse((nasa$q502d_rabi_firewoodcompare==5), 1, 0)

#collapse to three cat.
nasa$q502d_rabi_woodmoremore <- ifelse((nasa$q502d_rabi_firewoodcompare==1 | nasa$q502d_rabi_firewoodcompare==2), 1, 0)
table(nasa$q502d_rabi_woodmoremore)

table(nasa$q502d_rabi_woodsame)

nasa$q502d_rabi_woodlessless <- ifelse((nasa$q502d_rabi_firewoodcompare==5 | nasa$q502d_rabi_firewoodcompare==4), 1, 0)
table(nasa$q502d_rabi_woodlessless)

table(nasa$q502d_monsoon_firewoodcompare)
describe(nasa$q502d_monsoon_firewoodcompare)

str(nasa$q502d_monsoon_firewoodcompare)
nasa$q502d_monsoon_firewoodcompare[is.na(nasa$q502d_monsoon_firewoodcompare)] <- 0 
str(nasa$q502d_monsoon_firewoodcompare)

#dummy
nasa$q502d_monsoon_woodmuchmore <- ifelse((nasa$q502d_monsoon_firewoodcompare==1), 1, 0)
nasa$q502d_monsoon_woodmore <- ifelse((nasa$q502d_monsoon_firewoodcompare==2), 1, 0)
nasa$q502d_monsoon_woodsame <- ifelse((nasa$q502d_monsoon_firewoodcompare==3), 1, 0)
nasa$q502d_monsoon_woodless <- ifelse((nasa$q502d_monsoon_firewoodcompare==4), 1, 0)
nasa$q502d_monsoon_woodmuchless <- ifelse((nasa$q502d_monsoon_firewoodcompare==5), 1, 0)

#collapose to 3 cat.
nasa$q502d_monsoon_woodmoremore <- ifelse((nasa$q502d_monsoon_firewoodcompare==1 | nasa$q502d_monsoon_firewoodcompare==2), 1, 0)
table(nasa$q502d_monsoon_woodmoremore)
table(nasa$q502d_monsoon_woodsame)
nasa$q502d_monsoon_woodlessless <- ifelse((nasa$q502d_monsoon_firewoodcompare==5 | nasa$q502d_monsoon_firewoodcompare==4), 1, 0)
table(nasa$q502d_monsoon_woodlessless)
table(nasa$q502d_summer_firewoodcompare)
describe(nasa$q502d_summer_firewoodcompare)
str(nasa$q502d_summer_firewoodcompare)
nasa$q502d_summer_firewoodcompare[is.na(nasa$q502d_summer_firewoodcompare)] <- 0 
str(nasa$q502d_summer_firewoodcompare)

#dummy
nasa$q502d_summer_woodmuchmore <- ifelse((nasa$q502d_summer_firewoodcompare==1), 1, 0)
nasa$q502d_summer_woodmore <- ifelse((nasa$q502d_summer_firewoodcompare==2), 1, 0)
nasa$q502d_summer_woodsame <- ifelse((nasa$q502d_summer_firewoodcompare==3), 1, 0)
nasa$q502d_summer_woodless <- ifelse((nasa$q502d_summer_firewoodcompare==4), 1, 0)
nasa$q502d_summer_woodmuchless <- ifelse((nasa$q502d_summer_firewoodcompare==5), 1, 0)

#collapose to 3 cat.
nasa$q502d_summer_woodmoremore <- ifelse((nasa$q502d_summer_firewoodcompare==1 | nasa$q502d_summer_firewoodcompare==2), 1, 0)
table(nasa$q502d_summer_woodmoremore)
table(nasa$q502d_summer_woodsame)
nasa$q502d_summer_woodlessless <- ifelse((nasa$q502d_summer_firewoodcompare==5 | nasa$q502d_summer_firewoodcompare==4), 1, 0)
table(nasa$q502d_summer_woodlessless)
table(nasa$q502d_winter_firewoodcompare)
describe(nasa$q502d_winter_firewoodcompare)
str(nasa$q502d_winter_firewoodcompare)
nasa$q502d_winter_firewoodcompare[is.na(nasa$q502d_winter_firewoodcompare)] <- 0 
str(nasa$q502d_winter_firewoodcompare)

#dummy
nasa$q502d_winter_woodmuchmore <- ifelse((nasa$q502d_winter_firewoodcompare==1), 1, 0)
nasa$q502d_winter_woodmore <- ifelse((nasa$q502d_winter_firewoodcompare==2), 1, 0)
nasa$q502d_winter_woodsame <- ifelse((nasa$q502d_winter_firewoodcompare==3), 1, 0)
nasa$q502d_winter_woodless <- ifelse((nasa$q502d_winter_firewoodcompare==4), 1, 0)
nasa$q502d_winter_woodmuchless <- ifelse((nasa$q502d_winter_firewoodcompare==5), 1, 0)

#collapose to 3 cat.
nasa$q502d_winter_woodmoremore <- ifelse((nasa$q502d_winter_firewoodcompare==1 | nasa$q502d_winter_firewoodcompare==2), 1, 0)
table(nasa$q502d_winter_woodmoremore)
table(nasa$q502d_winter_woodsame)
nasa$q502d_winter_woodlessless <- ifelse((nasa$q502d_winter_firewoodcompare==5 | nasa$q502d_winter_firewoodcompare==4), 1, 0)
table(nasa$q502d_winter_woodlessless)

# 2.7 Changing difficuly in fw collection effort last 5 yrs (+dummy)-----
#in last 5 years, has it become easier or harder to collected firewood? 1 much arder 2 somewhat harder 3 stayed constant 4 somewhat easier 5 much easier , increasing in easier
par(mfrow=c(1,1))
table(nasa$q512_firewoodcollectioncompare)
str(nasa$q512_firewoodcollectioncompare)
describe(nasa$q512_firewoodcollectioncompare)

#dummy
nasa$q512_firewoodcollectioncompare_muchharder <- ifelse((nasa$q512_firewoodcollectioncompare==1), 1, 0)
nasa$q512_firewoodcollectioncompare_hard <- ifelse((nasa$q512_firewoodcollectioncompare==2), 1, 0)
nasa$q512_firewoodcollectioncompare_constant <- ifelse((nasa$q512_firewoodcollectioncompare==3), 1, 0)
nasa$q512_firewoodcollectioncompare_easy <- ifelse((nasa$q512_firewoodcollectioncompare==4), 1, 0)
nasa$q512_firewoodcollectioncompare_mucheasier <- ifelse((nasa$q512_firewoodcollectioncompare==5), 1, 0)

#create three cats.
nasa$q512_firewoodcollectioncompare_hardhard <- ifelse((nasa$q512_firewoodcollectioncompare==1 | nasa$q512_firewoodcollectioncompare==2), 1, 0)
table(nasa$q512_firewoodcollectioncompare_hardhard)

table(nasa$q512_firewoodcollectioncompare_constant)

nasa$q512_firewoodcollectioncompare_easyeasy <- ifelse((nasa$q512_firewoodcollectioncompare==4 | nasa$q512_firewoodcollectioncompare==5), 1, 0)
table(nasa$q512_firewoodcollectioncompare_easyeasy)

#revise order: much harder is 5 and much easier is 1  
nasa$q512_firewoodcollectioncompare.no <- 6-nasa$q512_firewoodcollectioncompare
table(nasa$q512_firewoodcollectioncompare.no)
describe(nasa$q512_firewoodcollectioncompare.no)

#logit models to see extreme perceptions 

#3 45 v 123, 123 = 0, 45 = 1
nasa$q512_firewoodcollectioncompare.no_123 <- with(nasa, q512_firewoodcollectioncompare.no==1 | q512_firewoodcollectioncompare.no==2 | q512_firewoodcollectioncompare.no ==3)
table(nasa$q512_firewoodcollectioncompare.no_123)
nasa$q512_firewoodcollectioncompare.no_123 <- ifelse((nasa$q512_firewoodcollectioncompare.no_123 == "FALSE"),1,0)
table(nasa$q512_firewoodcollectioncompare.no_123) #final variable for models

# 2.8 Qualitative data: perception of fw difficulty ------

table(nasa$q512_firewoodcollection_why)
look.for <- c("LPG_27", "LPG.Buyingwood_28", "LPG.Restriction_29") # 9 ppl LPG_27

look.for2 <- c("LPG.Restriction_29", "Restriction_1", "Restriction.Buyingwood_33", 
               "Restriction.Effort_34", "Restriction.Fire_31", "Restriction.lackofforest_30", 
               "Restriction.Lackofforest_30", "Restriction.Ownfield_32", "Restriction.Wildanimals_35", 
               "Fire.Restriction_11", "Faraway.Restriction_18") 

qual.nasarestrict <- nasa[nasa$q512_firewoodcollection_why %in% look.for2, ] #2591 observations

qual.nasa1 <-nasa[nasa$q512_firewoodcollection_why %in% look.for, ]

qual.nasa2 <- nasa[nasa$q512_firewoodcollection_why == "LPG.Restriction_29", ] #3 ppl

qual.nasa3 <- nasa[nasa$q512_firewoodcollection_why == "LPG.Buyingwood_28", ] #1 person

qual.nasa4 <- nasa[nasa$q512_firewoodcollection_why == "Lackofforest_25", ] #1161 ppl 

look.for3 <- c( "Restriction.lackofforest_30", 
                "Restriction.Lackofforest_30", "Lackofforest_25", "Faraway.Lackofforest_20",
                "Lackofforest.Ownfield.Buyingwood_38", "Lackofforest.Wildanimals_26",
                "Agriculturalexpansion.Lackofforest_41", "Norestriction.lackofforest_3",
                "Norestriction.Lackofforest_8", "Fire.Lackofforest_12") 

qual.lackofforest <- nasa[nasa$q512_firewoodcollection_why %in% look.for3, ] #1405 ppl

table(qual.nasa4$q207_edu_noformal)
table(qual.nasa4$q206_caste_scheduledtribe)
describe(qual.nasa4$q605_montlyexp) #mean=3769.2, mean for entire sample = 3782.8 but does not include maximum spending amts
#other categories that include lack of forest + lack of forest
look.for1 <- c("Lackofforest_25", "Norestriction.Lackofforest_8", "Norestriction.lackofforest_3", "Fire.Lackofforest_12", "Faraway.Lackofforest_20", "Lackofforest.Wildanimals_26", "Restriction.lackofforest_30" ,"Lackofforest.Ownfield.Buyingwood_38") 
qual.nasa5 <-nasa[nasa$q512_firewoodcollection_why %in% look.for1, ] #1295 ppl #26.52% of ppl mentioned lack of forest

# 2.9 T Test: tree cover and perception of fw difficulty-------
ttest <- t.test(Hansen2.74kmog ~ q512_firewoodcollectioncompare.no_123, data=nasa)
ttest 
ttest1 <- t.test(Hansen2.74km ~ q512_firewoodcollectioncompare.no_123, data=nasa)
ttest1
#hard mean is higher than mean in 123
nasahard <- subset(nasa, q512_firewoodcollectioncompare.no_123 == 1, select = c("Hansen3kmog", "Hansen2.74kmog", "Prot_Near"))
nasanohard <- subset(nasa, q512_firewoodcollectioncompare.no_123 == 0, select = c("Hansen3kmog", "Hansen2.74kmog", "Prot_Near"))
describe(nasahard$Hansen2.74kmog) #mean and median % tree cover a little higher where people reported harder firewood collection
quantile(nasahard$Hansen2.74kmog)
describe(nasanohard$Hansen2.74kmog)
quantile(nasanohard$Hansen2.74kmog)

# 2.10 T Test: fw difficuly and distance to PA------
ttest2 <- t.test(Prot_Near ~ q512_firewoodcollectioncompare.no_123, data=nasa)
ttest2

describe(nasanohard$Prot_Near)
describe(nasahard$Prot_Near)
  
# 2.11 LPG for cooking-----
describe(nasa$q507_lpgcooking)
table(nasa$q507_lpgcooking)

# 2.12 LPG ownership and fw collection -----
#subset only LPG OWNERS
nasalpgown <- subset(nasa, nasa$q507_lpgcooking==1)

table(nasalpgown$q502a_rabi_firewood_yesno)
table(nasalpgown$q502a_winter_firewood_yesno)
table(nasalpgown$q502a_summer_firewood_yesno)
table(nasalpgown$q502a_monsoon_firewood_yesno)

nasalpgown$lpgandnofw_3season <- ifelse((nasalpgown$q502a_rabi_firewood_yesno==1 | nasalpgown$q502a_winter_firewood_yesno==1 |
                                           nasalpgown$q502a_summer_firewood_yesno==1 | nasalpgown$q502a_monsoon_firewood_yesno==1), 1, 0)
table(nasalpgown$lpgandnofw_3season) #68% of LPG owners collected fw in at least 1 season of the year
table(nasalpgown$q506_firewoodcooking) #90% of LPG owners continued to cook with firewood

# 2.13 year of LPG ownership (+dummy)------
#when did you start using LPG? 1 before 2013 2 2103 3 2014 4 2015 5 2016 6 2017, need to add a never category
par(mfrow=c(1,2))
#q507a_lpg_when
table(nasa$q507a_lpg_when)
describe(nasa$q507a_lpg_when)

lpgwhen <- nasa$q507a_lpg_when
lpgwhen[is.na(lpgwhen)] <- 0  #change na to zero because these hh don't own LPG
lpgwhen

nasa <- cbind(lpgwhen,nasa)
nasa$lpgwhen

describe(nasa$lpgwhen)
table(nasa$lpgwhen)

# nasa$q507a_lpg_when : 1 before 2013 2 2103 3 2014 4 2015 5 2016 6 2017
table(nasa$q507a_lpg_when)
nasa$lpgwhen.yrsince <- 7-nasa$q507a_lpg_when
table(nasa$lpgwhen.yrsince)
# nasa$lpgwhen : 0 never 1 before 2013 2 2103 3 2014 4 2015 5 2016 6 2017
nasa$lpgwhen.yrsince[is.na(nasa$lpgwhen.yrsince)] <- 0
table(nasa$lpgwhen.yrsince)

nasa$loglpgwhen.yrsince <- log(nasa$lpgwhen.yrsince +1)
table(nasa$loglpgwhen.yrsince)

summaryBy(q502a_firewood_hoursweek_3season~lpgwhen.yrsince, data=nasa, FUN=c(mean,sd)) #summaryBy function

#change in LPG ownership
par(mfrow=c(1,3))
#Dummy code for change in LPG ownership, never to never = nasa$q507a_lpg_never, before 2013 to LPG = nasa$q507a_lpg_before2013, 2013 or after LPG = nasa$q507a_lpg_2013orafter
nasa$q507a_lpg_never <- ifelse((nasa$q507_lpgcooking==0), 1, 0)
table(nasa$q507a_lpg_never)

#lpg_20142015 , lpg_20162017 , lpg_2013orbefore
nasa$lpg_20142015 <- ifelse((nasa$lpgwhen==3 | nasa$lpgwhen==4), 1, 0)
table(nasa$lpg_20142015)

nasa$lpg_20162017 <- ifelse((nasa$lpgwhen==5 | nasa$lpgwhen==6), 1, 0)
table(nasa$lpg_20162017)

nasa$lpg_2013orbefore <- ifelse((nasa$lpgwhen==1 | nasa$lpgwhen==2), 1, 0)
table(nasa$lpg_2013orbefore)

nasa$lpg_2017 <- ifelse((nasa$lpgwhen==6), 1, 0)
table(nasa$lpg_2017)

# 2.14 Protected Area distance -----
nasa$Prot_Near_new <- nasa$Prot_Near/1000
describe(nasa$Prot_Near_new)
hist(nasa$Prot_Near_new)
par(mfrow=c(1,1))

# 2.15 PPL in village-----
table(nasa$q200_people)
nasa$logq200_people <- log(nasa$q200_people)
head(nasa$logq200_people)

nasa$logq200_people.std <- scale(nasa$logq200_people, center=T, scale=F)
head(nasa$logq200_people.std)

# 2.16 HH in village -----
table(nasa$q200_hh)
nasa$logq200_hh <- log(nasa$q200_hh)
head(nasa$logq200_hh)

nasa$logq200_hh.std <- scale(nasa$logq200_hh, center=T, scale=F)
head(nasa$logq200_hh.std)

# 2.17 PPL in hh -----
table(nasa$q210_peoplenumhh)
mean(nasa$q210_peoplenumhh)
describe(nasa$q210_peoplenumhh)

nasa$logq210_peoplenumhh <- log(nasa$q210_peoplenumhh)
head(nasa$logq210_peoplenumhh)
nasa$logq210_peoplenumhh.std <- scale(nasa$logq210_peoplenumhh, center=T, scale=F)
head(nasa$logq210_peoplenumhh.std)

describe(nasa$logq210_peoplenumhh.std)
hist(nasa$logq210_peoplenumhh.std)
nasa$logq210_peoplenumhh.std.bi <- ifelse((nasa$logq210_peoplenumhh.std>0.01), 1, 0)
table(nasa$logq210_peoplenumhh.std.bi)

# 2.18 Gender and Hhhead-----
table(nasa$q201_gender) #1 = female
nasa$male <- ifelse((nasa$q201_gender == 1), 0, 1)
table(nasa$male)
table(nasa$q200_hhhead)

nasa$hhheadfemale <- ifelse((nasa$q200_hhhead & nasa$q201_gender == 1), 1,0)
table(nasa$hhheadfemale) #out of all surveyed, only 518 (38%) of female respondants were hhheads, 10% of full sample

nasa$hhheadmale <- ifelse((nasa$male & nasa$q200_hhhead == 1), 1,0)
table(nasa$hhheadmale) #1's: 3029

table(nasa$q200a_relationhh)

nasa2014 <- subset(nasa, lpg_20142015 == 1)
table(nasa$lpg_20142015)
table(nasa2014$hhheadfemale)

table(nasa$lpg_20162017)
nasa2016 <- subset(nasa, lpg_20162017 == 1)
table(nasa2016$hhheadfemale)

table(nasa$lpg_2013orbefore)
nasa2013 <- subset(nasa, lpg_2013orbefore == 1)
table(nasa2013$hhheadfemale)

table(nasa$lpg_2017)
nasa2017 <- subset(nasa, lpg_2017 == 1)
table(nasa2017$hhheadfemale)

table(nasa$hhheadfemale)
#make gender 1 = female hh
nasa$q201_gender <- ifelse((nasa$q200_hhhead & nasa$q201_gender == 1), 1,0)
table(nasa$q201_gender)

# 2.19 Age -----
table(nasa$q202_age)
describe(nasa$q202_age)
hist(nasa$q202_age, breaks=20)

nasa$logq202_age <- log(nasa$q202_age)
head(nasa$logq202_age)
nasa$logq202_age.std <- scale(nasa$logq202_age, center=T, scale=F)
head(nasa$logq202_age.std)
hist(nasa$logq202_age.std)

describe(nasa$logq202_age.std)
nasa$logq202_age.std.bi <- ifelse((nasa$logq202_age.std>0.03), 1, 0)

table(nasa$logq202_age.std.bi)

# 2.22 Education level (+dummy)-----
#q207_levelschooling 1 no formal education, 2 primary, 3 secondary, 4 high school 5 intermediate 6 graduate/post graduate 8 other
table(nasa$q207_levelschooling)
mean(nasa$q207_levelschooling)
describe(nasa$q207_levelschooling)

#dummy for levelschooling
nasa$q207_edu_noformal <- ifelse((nasa$q207_levelschooling==1), 1, 0)
nasa$q207_edu_primary <- ifelse((nasa$q207_levelschooling==2), 1, 0)
nasa$q207_edu_secondary <- ifelse((nasa$q207_levelschooling==3), 1, 0)
nasa$q207_edu_hs <- ifelse((nasa$q207_levelschooling==4), 1, 0)
nasa$q207_edu_intermediate <- ifelse((nasa$q207_levelschooling==5), 1, 0)
nasa$q207_edu_gradps <- ifelse((nasa$q207_levelschooling==6), 1, 0)

table(nasa$q207_edu_noformal)
table(nasa$q207_edu_primary)
table(nasa$q207_edu_secondary)
table(nasa$q207_edu_hs)
table(nasa$q207_edu_intermediate)
table(nasa$q207_edu_gradps)

# 2.23 Religion (+dummy)-----
table(nasa$q205_religion)
nasa$q205_religion_hindu <- ifelse((nasa$q205_religion==1), 1, 0)
nasa$q205_religion_muslim <- ifelse((nasa$q205_religion==2), 1, 0)
nasa$q205_religion_otherreligion <- ifelse((nasa$q205_religion==3), 1, 0)

nasa$q205_religion_nothindi <- ifelse((nasa$q205_religion!=1), 1, 0)

# 2.24 Income and expenditure-----
describe(nasa$q605_montlyexp) #n=4994, mean=3783.78, median = 3000
quantile(nasa$q605_montlyexp)
table(nasa$q605_montlyexp) #assign -999 = 3783.78
nasa$q605_montlyexp[nasa$q605_montlyexp == -999] <- 3783.78
table(nasa$q605_montlyexp)
describe(nasa$q605_montlyexp) #1% of 75000 is 750, mean = 3784.79

nasa$montexp_usd <- nasa$q605_montlyexp/70
describe(nasa$montexp_usd)
#percentile 
nasa$montlypercent <- percent_rank(nasa$q605_montlyexp)
table(nasa$montlypercent)
hist(nasa$montlypercent)

nasamontmed <- subset(nasa, q605_montlyexp == 3000, select = c("q502a_firewood_hoursweek_3season"))
nasamonthigh <- subset(nasa, q605_montlyexp == 5000, select = c("q502a_firewood_hoursweek_3season"))
describe(nasamontmed$q502a_firewood_hoursweek_3season) 
describe(nasamonthigh$q502a_firewood_hoursweek_3season)

#percentile for monthly expentirue
nasa$montlypercent <- percent_rank(nasa$q605_montlyexp)
table(nasa$montlypercent)
hist(nasa$montlypercent)

hist(nasa$q605_montlyexp, freq=F, breaks=30, xlab="Amount in INR", ylab="Probability", main="Expenditure")
nasa$logq605_montlyexp <- nasa$q605_montlyexp +1
nasa$logq605_montlyexp <- log(nasa$logq605_montlyexp)
head(nasa$logq605_montlyexp)
nasa$logq605_montlyexp.std <- scale(nasa$logq605_montlyexp, center=T, scale=F)
head(nasa$logq605_montlyexp.std)
hist(nasa$logq605_montlyexp.std)
describe(nasa$logq605_montlyexp.std)

describe(nasa$logq605_montlyexp)

describe(nasa$q605_montlyexp)
hist(nasa$q605_montlyexp)
# 2.25 Bank account and savings
#make new variables: if you have amt. in a bank acct. and if you have amt in savings
table(nasa$q607_bankacct)
describe(nasa$q607_bankacct)

#if you have a bank acct and its greater than 0
nasa$q608_bankamount.bi <- ifelse((nasa$q607_bankacct==1 & nasa$q608_bankamount>0), 1, 0)
table(nasa$q608_bankamount.bi)
describe(nasa$q608_bankamount.bi)

nasabank <-  subset(nasa, q608_bankamount.bi==1)

table(nasabank$lpg_2013orbefore)
table(nasabank$lpg_20142015)
table(nasabank$lpg_20162017)
table(nasabank$q507_lpgcooking)
#if you have a saving acct and its greater than 0
nasa$savingamountamt <- ifelse((nasa$q609_savingamount>0), 1, 0)
table(nasa$savingamountamt) 

nasa$q609_savingamount.bi <- ifelse((nasa$q607_bankacct==1 & nasa$q609_savingamount>0), 1, 0)
table(nasa$q609_savingamount.bi)
describe(nasa$q609_savingamount.bi)
table(nasa$q609_savingamount.bi, nasa$q608_bankamount.bi)

# 3. Variable exploration and modification-----

# 3.1 Education: 4 categories----- 
#q207_levelschooling 1 no formal education, 2 primary, 3 secondary, 4 high school 5 intermediate 6 graduate/post graduate 8 other
a1 <- aov(nasa$logq502a_summer_firewood_hoursweek ~ as.factor(nasa$q207_levelschooling))
summary(a1)
TukeyHSD(a1, conf.level=.95)
#summer, 1 = no formal, 2 = primary + secondary, 3 = hs and above
a3 <- aov(nasa$logq502a_monsoon_firewood_hoursweek ~ as.factor(nasa$q207_levelschooling))
summary(a3)
TukeyHSD(a3, conf.level=.95)
#monsoon, model edu categories: 1 = no formal, 2 = formal
a4 <- aov(nasa$logq502a_winter_firewood_hoursweek ~ as.factor(nasa$q207_levelschooling))
summary(a4)
TukeyHSD(a4, conf.level=.95)
#winter, 1 = no formal, 2 = primary + secondary, 3 = hs, 4 = intermediate and above
a5 <- aov(nasa$logq502a_rabi_firewood_hoursweek ~ as.factor(nasa$q207_levelschooling))
summary(a5)
TukeyHSD(a5, conf.level=.95)
#rabi, 1 = no formal, 2 = primary + secondary, 3 = hs and above

boxplot(nasa$lpgwhen ~ nasa$q207_levelschooling)
aaa1 <- aov(nasa$lpgwhen ~ as.factor(nasa$q207_levelschooling))
summary(aaa1)
TukeyHSD(aaa1, conf.level=.95)
#only sig diff. between 1 and 2
#FINAL: 1 = no formal, 2 = primary + secondary, 3 = hs, 4 = intermediate and above

table(nasa$q207_edu_noformal)

nasa$q207_edu_primarysecondary <- ifelse((nasa$q207_levelschooling==2 | nasa$q207_levelschooling==3), 1, 0)
table(nasa$q207_edu_primarysecondary)

table(nasa$q207_edu_hs)

nasa$q207_edu_intermediateabove <- ifelse((nasa$q207_levelschooling==5 | nasa$q207_levelschooling==6), 1, 0)
table(nasa$q207_edu_intermediateabove)

#SUBSETS
# 3.2 Statewise Table 2 information -----
table(nasa$state_anonymous)
head(nasa$state_anonymous)

state1 <- subset(nasa, state_anonymous=="1")
state2 <- subset(nasa, state_anonymous=="2")
state3 <- subset(nasa, state_anonymous=="3")
describe(state1$q507_lpgcooking) #N= 51% % LPG for cooking
describe(state2$q507_lpgcooking) #43 % LPG for cooking
describe(state3$q507_lpgcooking) #49% % LPG for cooking

# 3.3 Pre vs. post 2016 LPG adoption comparison-----

nasa_nolpg <- subset(nasa, nasa$q507_lpgcooking==0)
table(nasa_nolpg$q201_gender)

describe(nasa_nolpg$montexp_usd)

nasa_lpg <- subset(nasa, nasa$q507_lpgcooking==1) #2276
nasa_lpg$lpg2015orbefore <- ifelse((nasa_lpg$lpg_2013orbefore==1 | nasa_lpg$lpg_20142015==1), 1, 0)
describe(nasa_lpg$lpg2015orbefore) #24%
describe(nasa_lpg$lpg_20162017) #76%

nasa_lpg2015before <- subset(nasa_lpg, nasa_lpg$lpg2015orbefore==1) #547
nasa_lpg20162017 <- subset(nasa_lpg, nasa_lpg$lpg_20162017==1) #1729

t.test(nasa_lpg$q206_caste_scheduledcaste ~ nasa_lpg$lpg_20162017) #no sig p
wilcox.test(nasa_lpg$q206_caste_scheduledcaste ~ nasa_lpg$lpg_20162017) #no sig p
describe(nasa_lpg2015before$q206_caste_scheduledcaste) #avg .15
describe(nasa_lpg20162017$q206_caste_scheduledcaste) #avg .13

t.test(nasa_lpg$q206_caste_scheduledtribe ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q206_caste_scheduledtribe ~ nasa_lpg$lpg_20162017)#sig p!
describe(nasa_lpg2015before$q206_caste_scheduledtribe) #avg .35
describe(nasa_lpg20162017$q206_caste_scheduledtribe) #avg .57

t.test(nasa_lpg$q206_caste_forward ~ nasa_lpg$lpg_20162017)#sig p!
wilcox.test(nasa_lpg$q206_caste_forward ~ nasa_lpg$lpg_20162017)#sig p!
describe(nasa_lpg2015before$q206_caste_forward) #avg .11
describe(nasa_lpg20162017$q206_caste_forward) #avg .03

t.test(nasa_lpg$q206_caste_otherbackward ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q206_caste_otherbackward ~ nasa_lpg$lpg_20162017) #sig p!
describe(nasa_lpg2015before$q206_caste_otherbackward) #avg .39
describe(nasa_lpg20162017$q206_caste_otherbackward) #avg .27


t.test(nasa_lpg$q512_firewoodcollectioncompare.no_123 ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q512_firewoodcollectioncompare.no_123 ~ nasa_lpg$lpg_20162017) #sig p!
describe(nasa_lpg2015before$q512_firewoodcollectioncompare.no_123) #avg .75
describe(nasa_lpg20162017$q512_firewoodcollectioncompare.no_123) # avg .9


t.test(nasa_lpg$q207_edu_noformal ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q207_edu_noformal ~ nasa_lpg$lpg_20162017) #sig p! 
describe(nasa_lpg2015before$q207_edu_noformal) #avg .2
describe(nasa_lpg20162017$q207_edu_noformal) #avg .38

t.test(nasa_lpg$q207_edu_primarysecondary ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q207_edu_primarysecondary ~ nasa_lpg$lpg_20162017) #sig p! 
describe(nasa_lpg2015before$q207_edu_primarysecondary) #avg .34
describe(nasa_lpg20162017$q207_edu_primarysecondary) #avg .4

t.test(nasa_lpg$q207_edu_hs ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q207_edu_hs ~ nasa_lpg$lpg_20162017) #sig p! 
describe(nasa_lpg2015before$q207_edu_hs) #avg .18
describe(nasa_lpg20162017$q207_edu_hs) #avg .12


t.test(nasa_lpg$q207_edu_intermediateabove ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$q207_edu_intermediateabove ~ nasa_lpg$lpg_20162017) #sig p! 
describe(nasa_lpg2015before$q207_edu_intermediateabove) #avg .29
describe(nasa_lpg20162017$q207_edu_intermediateabove) #avg .09

t.test(nasa_lpg$q608_bankamount.bi ~ nasa_lpg$lpg_20162017) #sig p! 
wilcox.test(nasa_lpg$q608_bankamount.bi ~ nasa_lpg$lpg_20162017) #sig p! 
describe(nasa_lpg2015before$q608_bankamount.bi) #mean .48 (48%)
describe(nasa_lpg20162017$q608_bankamount.bi) #mean 0.39 (39%)

t.test(nasa_lpg$q609_savingamount.bi ~ nasa_lpg$lpg_20162017) #sig p! 
wilcox.test(nasa_lpg$q609_savingamount.bi ~ nasa_lpg$lpg_20162017) #sig p! 
describe(nasa_lpg2015before$q609_savingamount.bi) #mean .4 (40%)
describe(nasa_lpg20162017$q609_savingamount.bi) #mean .3 (30%)

t.test(nasa_lpg$logq605_montlyexp.std ~ nasa_lpg$lpg_20162017) #sig p!
wilcox.test(nasa_lpg$logq605_montlyexp.std ~ nasa_lpg$lpg_20162017) #sig p!
describe(nasa_lpg2015before$q605_montlyexp) #mean 5514.79 
describe(nasa_lpg20162017$q605_montlyexp) #mean 3743.61

#Hansen t tests
t.test(nasa_lpg$Hansen1km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen1km ~ nasa_lpg$lpg_20162017)
describe(nasa_lpg2015before$Hansen1kmog) 
describe(nasa_lpg20162017$Hansen1kmog)

t.test(nasa_lpg$Hansen2km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen2km ~ nasa_lpg$lpg_20162017) 
describe(nasa_lpg2015before$Hansen2kmog) 
describe(nasa_lpg20162017$Hansen2kmog)

t.test(nasa_lpg$Hansen2.74km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen2.74km ~ nasa_lpg$lpg_20162017) 
describe(nasa_lpg2015before$Hansen2.74kmog) 
describe(nasa_lpg20162017$Hansen2.74kmog) 

t.test(nasa_lpg$Hansen3km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen3km ~ nasa_lpg$lpg_20162017) 
describe(nasa_lpg2015before$Hansen3kmog) 
describe(nasa_lpg20162017$Hansen3kmog) 

t.test(nasa_lpg$Hansen5km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen5km ~ nasa_lpg$lpg_20162017) 
describe(nasa_lpg2015before$Hansen5kmog) 
describe(nasa_lpg20162017$Hansen5kmog) 

t.test(nasa_lpg$Hansen8km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen8km ~ nasa_lpg$lpg_20162017) 
describe(nasa_lpg2015before$Hansen8kmog) 
describe(nasa_lpg20162017$Hansen8kmog) 

t.test(nasa_lpg$Hansen10km ~ nasa_lpg$lpg_20162017) 
wilcox.test(nasa_lpg$Hansen10km ~ nasa_lpg$lpg_20162017) 
describe(nasa_lpg2015before$Hansen10kmog) 
describe(nasa_lpg20162017$Hansen10kmog) 

# 3.4 Why in 2013 or before, 2014/2015, and 2016/2017?-----
nasa$lpgwhenfactored <- with(nasa, ifelse(nasa$lpgwhen %in% c(1,2), 1, ifelse(nasa$lpgwhen %in% c(3,4), 2, ifelse(nasa$lpgwhen %in% c(5,6), 3, 0))))

table(nasa$lpgwhenfactored) #0 no LPG, 1 2013 or before, 2 2014 - 2015, 3 2016 - 2017
nasa$lpgwhenfactored <- factor(nasa$lpgwhenfactored)
levels(nasa$lpgwhenfactored)

anova <- aov(q502a_firewood_hoursweek_3season ~ lpgwhenfactored, data=nasa)
summary(anova) #sig diff
TukeyHSD(anova, conf.level=.95) # only 2 and 1 are not different

anova1 <- aov(q507_lpgcooking ~ lpgwhenfactored, data=nasa)
summary(anova1) #sig diff
TukeyHSD(anova1, conf.level=.95) 

anova2 <- aov(q207_levelschooling ~ lpgwhenfactored, data=nasa) #1 = no formal, 2 = primary + secondary, 3 = hs and above
summary(anova2) #sig diff
TukeyHSD(anova2, conf.level=.95)

anova3 <- aov(q201_gender ~ lpgwhenfactored, data=nasa) 
summary(anova3) #sig diff
TukeyHSD(anova3, conf.level=.95)#some sig diff

anova4 <- aov(hhheadfemale ~ lpgwhenfactored, data=nasa) 
summary(anova4) #sig diff
TukeyHSD(anova4, conf.level=.95) #only 2 and 0 are different

anova5 <- aov(q206_caste ~ lpgwhenfactored, data=nasa) 
summary(anova5) #sig diff
TukeyHSD(anova5, conf.level=.95) 

anova6 <- aov(q605_montlyexp ~ lpgwhenfactored, data=nasa) 
summary(anova6) #sig diff
TukeyHSD(anova6, conf.level=.95)

anova7 <- aov(Hansen3kmog ~ lpgwhenfactored, data=nasa) 
summary(anova7) #
TukeyHSD(anova7, conf.level=.95) # 

anova8_8 <- aov(road_osm ~ lpgwhenfactored, data=nasa) 
summary(anova8_8) #sig diff
TukeyHSD(anova8_8, conf.level=.95)

anova8 <- aov(Road_Near ~ lpgwhenfactored, data=nasa) 
summary(anova8) #NOT sig diff
TukeyHSD(anova8, conf.level=.95)

anova9 <- aov(q609_savingamount.bi ~ lpgwhenfactored, data=nasa) 
summary(anova9) #sig diff
TukeyHSD(anova9, conf.level=.95) #3-2 and 2-0 not sig diff

anova10 <- aov(q607_bankacct ~ lpgwhenfactored, data=nasa) 
summary(anova10) #sig diff
TukeyHSD(anova10, conf.level=.95) #only 1-0 and 3-0 ARE sig diff

# 5. Model and figures ------

# 5.1 Variable modification (monthly expend, Hansen 2.74, and distance to road) -------
class(nasa$logq605_montlyexp.std)
nasa$logq605_montlyexp.std <- as.numeric(nasa$logq605_montlyexp.std)
class(nasa$logq605_montlyexp.std)

nasa$Hansen2.74km <- as.numeric(nasa$Hansen2.74km)
class(nasa$Hansen2.74km)

nasa$logroad_osm.std <- as.numeric(nasa$logroad_osm.std)
class(nasa$logroad_osm.std)

# 5.2 Model 1/Figure 2 ------
#----LPG cooking ~ 
#Hansen2.74km
LPGmodel1repop3km <- glm(q507_lpgcooking ~
                             logq605_montlyexp.std +
                             q608_bankamount.bi +
                             q609_savingamount.bi +  
                             q201_gender +  
                             q206_caste_otherbackward +
                             q206_caste_scheduledcaste +
                             q206_caste_scheduledtribe +
                             q207_edu_primarysecondary +
                             q207_edu_hs +
                             q207_edu_intermediateabove +
                             q512_firewoodcollectioncompare.no_123 +
                             Hansen2.74km +
                             logroad_osm.std +
                            factor(district_anonymous)
                         , data = nasa, family = "binomial") 

summary(LPGmodel1repop3km) #AIC 6540.34

exp(coef(LPGmodel1repop3km))
exp(confint(LPGmodel1repop3km))
 
#-----ST WD for Figures
#set wd to Desktop for destination of figures
setwd("~/Desktop")

#---marginal effects for model 1
x <- margins(LPGmodel1repop3km)
x <- summary(x)
x
x$factor

x <- x[c(1,2,3,35,36,37,38,39,40,41,42,43,44),]
x$factor

x <- x[c(3, 1, 11, 9, 8, 10, 7, 6, 5, 4, 13, 12, 2),]
x$factor
x$factor <- factor(x$factor, levels = x$factor)
x$factor
x
x$AMEnew <- 100*x$AME
x$uppernew <- 100*x$upper
x$lowernew <- 100*x$lower
x

tiff("figure2marginal.tiff", units="in", width=10, height=8, res=300)

xplot <- ggplot(data = x) +
  geom_point(aes(factor, AMEnew)) +
  geom_pointrange(aes(x = factor, y = AMEnew, ymin = lowernew, ymax = uppernew))+
  geom_hline(yintercept = 0, colour="grey60", linetype=2) +
  theme_classic() +
  theme(plot.title=element_text(size=12),
        plot.subtitle = element_text(size=12),
        axis.text = element_text(size=15),
        axis.title = element_text(size=15),
        legend.position = c(0.84,0.95),
        legend.title = element_blank(),
        legend.text = element_text(size=12),
        text=element_text(family="Times New Roman", size=12)) + 
  scale_x_discrete(labels=c(logq605_montlyexp.std = "Monthly expenditure (log)",
                            q608_bankamount.bi = "Money in a bank account?",
                            q609_savingamount.bi = "Has saved money?",
                            q201_gender = "Woman as household head?",
                            q206_caste_otherbackward = "Caste Status: OBC?",
                            q206_caste_scheduledcaste = "Caste Status: SC?",
                            q206_caste_scheduledtribe = "Caste Status: ST?",
                            q207_edu_primarysecondary = "Education: Primary/Secondary?",
                            q207_edu_hs = "Education: High school?",
                            q207_edu_intermediateabove = "Education: Intermediate and above?",
                            q512_firewoodcollectioncompare.no_123 = "Increased difficulty collecting firewood?", 
                            Hansen2.74km = "Tree cover (log)", 
                            logroad_osm.std = "Distance to road (log)"))


xplot + coord_flip() + 
  xlab("") + ylab("\nAverage Marginal Effect")  

dev.off()

#model 1 w/out bank account variable
LPGmodel1repop3kmbank <- glm(q507_lpgcooking ~
                           logq605_montlyexp.std +

                           q609_savingamount.bi +  
                           q201_gender +  
                           q206_caste_otherbackward +
                           q206_caste_scheduledcaste +
                           q206_caste_scheduledtribe +
                           q207_edu_primarysecondary +
                           q207_edu_hs +
                           q207_edu_intermediateabove +
                           q512_firewoodcollectioncompare.no_123 +
                           Hansen2.74km +
                           logroad_osm.std +
                           factor(district_anonymous)
                         , data = nasa, family = "binomial") 

summary(LPGmodel1repop3kmbank)
 
tidyfig2 <- tidy(LPGmodel1repop3km)

# 5.2.2 Predicted probab of Figure 2 for continuous variables -------
predict(LPGmodel1repop3km)

moneyall <- cplot(LPGmodel1repop3km, "logq605_montlyexp.std")

tiff("figure2moneyprob.tiff", units="in", width=8, height=8, res=300)

ggplot(data=moneyall, aes(x = xvals)) + 
         geom_line(aes(y = yvals)) +
         geom_line(aes(y = upper), linetype = 2) +
         geom_line(aes(y = lower), linetype = 2) +
         geom_hline(yintercept = 0, linetype =2, color="gray") +
  geom_rug(data=tidyfig2, 
           aes(x=estimate), col="black", sides="b") +
theme_classic() +
  theme(axis.text = element_text(size=35),
        axis.title = element_text(size=35),
        text=element_text(family="Times New Roman", size=35)) +
  scale_y_continuous(limits= c(0, 1),
                     breaks=c(0, 0.25, .5, 0.75, 1),
                     labels=c("0%", "25%","50%","75%", "100%")) +
scale_x_continuous(limits= c(-8, 4),
                 breaks= c(-8, -4, 0, 4)) +
  xlab("Monthly expenditure (log)") + ylab("") +
  geom_hline(yintercept = 0.25, linetype=2, color="gray") + 
  geom_hline(yintercept = 1, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.5, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.75, linetype=2, color="gray")

dev.off()

treeall <- cplot(LPGmodel1repop3km, "Hansen2.74km", 
                 draw = F)

tiff("figure2treeprob.tiff", units="in", width=8, height=8, res=300)

ggplot(data=treeall, aes(x = xvals)) + 
  geom_line(aes(y = yvals)) +
  geom_line(aes(y = upper), linetype = 2) +
  geom_line(aes(y = lower), linetype = 2) +
  geom_hline(yintercept = 0, linetype =2, color="gray") +
  geom_rug(data=tidyfig2, 
           aes(x=estimate), col="black", sides="b") +
  theme_classic() +
  theme(axis.text = element_text(size=35),
        axis.title = element_text(size=35),
        text=element_text(family="Times New Roman", size=35)) +
  scale_y_continuous(limits= c(0, 1),
                     breaks=c(0, 0.25, .5, 0.75, 1),
                     labels=c("0%", "25%","50%","75%", "100%")) +
  scale_x_continuous(limits= c(-2, 2),
                     breaks= c(-2, 0, 2)) +
  xlab("Tree cover (log)") +  ylab("") +
  geom_hline(yintercept = 0.25, linetype=2, color="gray") + 
  geom_hline(yintercept = 1, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.5, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.75, linetype=2, color="gray")

dev.off()

roadall <- cplot(LPGmodel1repop3km, "logroad_osm.std")
tiff("figure2roadprob.tiff", units="in", width=8, height=8, res=300)

ggplot(data=roadall, aes(x = xvals)) + 
  geom_line(aes(y = yvals)) +
  geom_line(aes(y = upper), linetype = 2) +
  geom_line(aes(y = lower), linetype = 2) +
  geom_hline(yintercept = 0, linetype =2, color="gray") +
  geom_rug(data=tidyfig2, 
           aes(x=estimate), col="black", sides="b") +
  theme_classic() +
  theme(axis.text = element_text(size=35),
        axis.title = element_text(size=35),
        text=element_text(family="Times New Roman", size=35)) +
  scale_y_continuous(limits= c(0, 1),
                     breaks=c(0, 0.25, .5, 0.75, 1),
                     labels=c("0%", "25%","50%","75%", "100%")) +
  scale_x_continuous(breaks= c(-1, 0, 1, 2), limits = c(-1 , 2)) +
  xlab("Distance to road (log)") +   ylab("")+
  geom_hline(yintercept = 0.25, linetype=2, color="gray") + 
  geom_hline(yintercept = 1, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.5, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.75, linetype=2, color="gray")
dev.off()

# 5.3 Model 2/Figure 3 ------

#----subset only LPG OWNERS
nasalpgown <- subset(nasa, nasa$q507_lpgcooking==1)
table(nasalpgown$lpg_20162017) #N = 2276
table(nasalpgown$lpg_2017) # 1 = 1240

describe(nasalpgown$logq605_montlyexp.std)
describe(nasa$logq605_montlyexp.std)

describe(nasalpgown$Hansen2.74km)
describe(nasa$Hansen2.74km)

describe(nasalpgown$logroad_osm.std)
describe(nasa$logroad_osm.std)

LPGownmodel <- glm(lpg_20162017 ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen2.74km +
                     logroad_osm.std +
                     factor(district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel)

exp(coef(LPGownmodel))
exp(confint(LPGownmodel))

#----marginal effects LPG 2016, 2017
y <- summary(margins(LPGownmodel))
y$factor

y <- y[c(1,2,3,35,36,37,38,39,40,41,42,43,44),]
y$factor

y <- y[c(3, 1, 11, 9, 8, 10, 7, 6, 5, 4, 13, 12, 2),]
y$factor
y$factor <- factor(y$factor, levels = y$factor)
y$factor

y$AMEnew <- 100*y$AME
y$uppernew <- 100*y$upper
y$lowernew <- 100*y$lower
y
tiff("figure320162017marginal.tiff", units="in", width=10, height=8, res=300)

yplot <- ggplot(data = y) +
  geom_point(aes(factor, AMEnew)) +
  geom_pointrange(aes(x = factor, y = AMEnew, ymin = lowernew, ymax = uppernew)) +
  geom_hline(yintercept = 0, colour="grey60", linetype=2) +
  theme_classic() +
  theme(plot.title=element_text(size=12),
        plot.subtitle = element_text(size=12),
        axis.text = element_text(size=15),
        axis.title = element_text(size=15),
        text=element_text(family="Times New Roman", size=12)) + 
  scale_x_discrete(labels = c(logq605_montlyexp.std = "Monthly expenditure (log)",
                              q608_bankamount.bi = "Money in a bank account?",
                              q609_savingamount.bi = "Has saved money?",
                              q201_gender = "Woman as household head?",
                              q206_caste_otherbackward = "Caste Status: OBC?",
                              q206_caste_scheduledcaste = "Caste Status: SC?",
                              q206_caste_scheduledtribe = "Caste Status: ST?",
                              q207_edu_primarysecondary = "Education: Primary/Secondary?",
                              q207_edu_hs = "Education: High school?",
                              q207_edu_intermediateabove = "Education: Intermediate and above?",
                              q512_firewoodcollectioncompare.no_123 = "Increased difficulty collecting firewood?", 
                              Hansen2.74km = "Tree cover (log)", 
                              logroad_osm.std = "Distance to road (log)"))


yplot + coord_flip() + 
  xlab("") + ylab("\nAverage Marginal Effect") +
  geom_vline(xintercept=0, colour="grey60", linetype=2)


dev.off()

#model 2 without bank
LPGownmodelbank <- glm(lpg_20162017 ~
                     logq605_montlyexp.std +
                    
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen2.74km +
                     logroad_osm.std +
                     factor(district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodelbank)

# 5.3.2 Predicted probab of Figure 3 for continuous variables -------
tidyfig3 <- tidy(LPGownmodel)

money <- cplot(LPGownmodel, "logq605_montlyexp.std")
tiff("figure3moneyprob.tiff", units="in", width=8, height=8, res=300)

ggplot(data=money, aes(x = xvals)) + 
  geom_line(aes(y = yvals)) +
  geom_line(aes(y = upper), linetype = 2) +
  geom_line(aes(y = lower), linetype = 2) +
  geom_rug(data=tidyfig3, 
           aes(x=estimate), col="black", sides="b") +
  theme_classic() +
  theme(axis.text = element_text(size=35),
        axis.title = element_text(size=35),
        text=element_text(family="Times New Roman", size=35)) +
  scale_y_continuous(limits= c(0, 1),
                      breaks=c(0, 0.25, .5, 0.75, 1),
                      labels=c("0%", "25%","50%","75%", "100%")) +
  scale_x_continuous(limits= c(-8, 4),
                     breaks= c(-8, -4, 0, 4)) +
  xlab("Monthly expenditure (log)") +  ylab("") +
  geom_hline(yintercept = 1, linetype=2, color="gray") + 
  geom_hline(yintercept = 0, linetype =2, color="gray") +
  geom_hline(yintercept = 0.5, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.75, linetype=2, color="gray") +
  geom_hline(yintercept = 0.25, linetype=2, color="gray") 
dev.off()


tree <- cplot(LPGownmodel, "Hansen2.74km")

tiff("figure3treeprob.tiff", units="in", width=8, height=8, res=300)

ggplot(data=tree, aes(x = xvals)) + 
  geom_line(aes(y = yvals)) +
  geom_line(aes(y = upper), linetype = 2) +
  geom_line(aes(y = lower), linetype = 2) +
  geom_rug(data=tidyfig3, 
           aes(x=estimate), col="black", sides="b") +
  theme_classic() +
  theme(axis.text = element_text(size=35),
        axis.title = element_text(size=35),
        text=element_text(family="Times New Roman", size=35)) +
  scale_y_continuous(limits= c(0, 1),
                     breaks=c(0, 0.25, .5, 0.75, 1),
                     labels=c("0%", "25%","50%","75%", "100%")) +
  scale_x_continuous(limits= c(-2, 2),
                     breaks= c(-2, 0, 2)) +
  xlab("Tree cover (log)") + ylab("") +
  geom_hline(yintercept = 1, linetype=2, color="gray") + 
  geom_hline(yintercept = 0, linetype =2, color="gray") +
  geom_hline(yintercept = 0.25, linetype=2, color="gray") +
  geom_hline(yintercept = 0.5, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.75, linetype=2, color="gray")
dev.off()

road <- cplot(LPGownmodel, "logroad_osm.std")

tiff("figure3roadprob.tiff", units="in", width=8, height=8, res=300)

ggplot(data=road, aes(x = xvals)) + 
  geom_line(aes(y = yvals)) +
  geom_line(aes(y = upper), linetype = 2) +
  geom_line(aes(y = lower), linetype = 2) +
  geom_rug(data=tidyfig3, 
           aes(x=estimate), col="black", sides="b") +
  theme_classic() +
  theme(axis.text = element_text(size=35),
        axis.title = element_text(size=35),
        text=element_text(family="Times New Roman", size=35)) +
  scale_y_continuous(limits= c(0, 1),
                     breaks=c(0, 0.25, .5, 0.75, 1),
                     labels=c("0%", "25%","50%","75%", "100%")) +
  scale_x_continuous(breaks= c(-1, 0, 1, 2), limits = c(-1 , 2)) +
  xlab("Distance to road (log)") + ylab ("") +
  geom_hline(yintercept = 1, linetype=2, color="gray") + 
  geom_hline(yintercept = 0, linetype =2, color="gray") +
  geom_hline(yintercept = 0.25, linetype=2, color="gray") +
  geom_hline(yintercept = 0.5, linetype=2, color="gray") + 
  geom_hline(yintercept = 0.75, linetype=2, color="gray")
dev.off()
# 5.4 Model 3/Figure 4 ------

#-----FUELWOOD HRS/WK ~ 
par(mfrow=c(1,1))
#avg. 3 seasons
lin.Hansen3kmlm <- lm(logq502a_firewood_hoursweek_3season ~ 
                        lpg_2013orbefore + 
                        lpg_20142015 + 
                        lpg_20162017 + 
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2.74km +
                        logroad_osm.std +
                        factor(district_anonymous)
                      , data = nasa)

summary(lin.Hansen3kmlm)

#without bank 
lin.Hansen3kmlmbank <- lm(logq502a_firewood_hoursweek_3season ~ 
                        lpg_2013orbefore + 
                        lpg_20142015 + 
                        lpg_20162017 + 
                        logq605_montlyexp.std +
                       
                        q609_savingamount.bi +
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2.74km +
                        logroad_osm.std +
                        factor(district_anonymous)
                      , data = nasa)

summary(lin.Hansen3kmlmbank)

#-----exponentiated avg 3 seasons  
c <- tidy(lin.Hansen3kmlm)
c
c <- c[2:17,]
c
c <- c[16:1,]
c
c$term <- factor(c$term, levels = c$term)
c$term

cnew <- as.data.frame(c)
cnew

cnew$estimate
cnew$estimatenew <- 100*(1-(exp(cnew$estimate)))*-1
confint <- exp(confint(lin.Hansen3kmlm))
confint <- confint[2:17,]
confint <- confint[16:1,]

cnew <- cbind(cnew, confint)
cnew
cnew$upper <- 100*(1-(cnew$`97.5 %`))*-1
cnew$lower <- 100*(1-(cnew$`2.5 %`))*-1
cnew$term
cnew

#-----MONSOON

#Hansen 3km
lin.Hansen3kmmonlm <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                        lpg_2013orbefore + 
                        lpg_20142015 + 
                        lpg_20162017 + 
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2.74km +
                        logroad_osm.std +
                        factor(district_anonymous)
                      , data = nasa)

summary(lin.Hansen3kmmonlm)
confint(lin.Hansen3kmmonlm)

#without bank
lin.Hansen3kmmonlmbank <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                           lpg_2013orbefore + 
                           lpg_20142015 + 
                           lpg_20162017 + 
                           logq605_montlyexp.std +
                         
                           q609_savingamount.bi +
                           q201_gender +  
                           q206_caste_otherbackward +
                           q206_caste_scheduledcaste +
                           q206_caste_scheduledtribe +
                           q207_edu_primarysecondary +
                           q207_edu_hs +
                           q207_edu_intermediateabove +
                           q512_firewoodcollectioncompare.no_123 +
                           Hansen2.74km +
                           logroad_osm.std +
                           factor(nasa$district_anonymous)
                         , data = nasa)

summary(lin.Hansen3kmmonlmbank)

##----exponentiated monsoon
m <- tidy(lin.Hansen3kmmonlm)
m
m <- m[2:17,]
m
m <- m[16:1,]
m
m$term <- factor(m$term, levels = m$term)
m$term

mnew <- as.data.frame(m)
mnew

mnew$estimate
mnew$estimatenew <- 100*(1-(exp(mnew$estimate)))*-1
confintm <- exp(confint(lin.Hansen3kmmonlm))
confintm <- confintm[2:17,]
confintm <- confintm[16:1,]

mnew <- cbind(mnew, confintm)
mnew
mnew$upper <- 100*(1-(mnew$`97.5 %`))*-1
mnew$lower <- 100*(1-(mnew$`2.5 %`))*-1
mnew$term
mnew

#exponentiated model, equation 3, figure 4

# mnew is the dataframe for monsoon exponentiated values 
# cnew is the dataframe for exponentiated values for the average of 3 seasons model 

xplot_df <- (mnew %>% mutate(model = "Monsoon")) %>%
  bind_rows(cnew %>% mutate(model = "Average 3 Seasons"))

tiff("figure4exponentiatedFINAL.tiff", units="in", width=10, height=8, res=300)

ggplot(data = xplot_df, aes (x=estimatenew/100, y = term, xmin = lower/100, xmax = upper/100, shape = model, color = model, group = model, linetype=model)) +
  geom_point(position = position_dodge(width = .5), size=2) +
  geom_errorbar(position = position_dodge(width = .5), size=1, width = 0) + 
  scale_color_manual(values=c("black", "grey70")) +
  scale_shape_manual(values=c(16,15)) +
  scale_linetype_manual(values=c(1,2)) + 
  geom_vline(xintercept = 0, colour="grey60", linetype=2) +
  xlab("Percent Change in Time Spent Collecting Fuelwood") + 
  ylab("") +
  scale_x_continuous(breaks=(seq(-0.75, 0.75, 0.25)), labels = scales::percent_format()) +
  scale_y_discrete(labels=c(lpg_2013orbefore = "Adopted LPG in 2013 or before?",
                            lpg_20142015 = "Adopted LPG in 2014 or 2015?",
                            lpg_20162017 = "Adopted LPG in 2016 or 2017?", 
                            logq605_montlyexp.std = "Monthly expenditure (log)",
                            q608_bankamount.bi = "Money in a bank account?",
                            q609_savingamount.bi = "Has saved money?",
                            q201_gender = "Woman as household head?",
                            q206_caste_otherbackward = "Caste Status: OBC?",
                            q206_caste_scheduledcaste = "Caste Status: SC?",
                            q206_caste_scheduledtribe = "Caste Status: ST?",
                            q207_edu_primarysecondary = "Education: Primary/Secondary?",
                            q207_edu_hs = "Education: High school?",
                            q207_edu_intermediateabove = "Education: Intermediate and above?",
                            q512_firewoodcollectioncompare.no_123 = "Increased difficulty collecting firewood?", 
                            Hansen2.74km = "Tree cover (log)", 
                            logroad_osm.std = "Distance to road (log)")) + 
  theme_classic() + 
  theme(plot.title=element_text(size=12),
        plot.subtitle = element_text(size=12),
        axis.text = element_text(size=15),
        axis.title = element_text(size=15),
        legend.position = c(0.84,0.95),
        legend.title = element_blank(),
        legend.text = element_text(size=16),
        text=element_text(family="Times New Roman", size=12)) 


dev.off()

# 6. Additional figures/tables in text ------
# 6.1 Table 3 -----
#Summarizing household characteristics by year of LPG adoption

nasa$lpg_when_category <- ifelse(nasa$lpg_2013orbefore==1, "2013 or before", 
                                 ifelse(nasa$lpg_20142015==1, "2014-2015",
                                        ifelse(nasa$lpg_20162017==1, "2016-2017", "No LPG")))

nasa$education_category <- ifelse(nasa$q207_edu_noformal==1, "No Formal Education",
                                  ifelse(nasa$q207_edu_primarysecondary==1, "Primary/Secondary",
                                         ifelse(nasa$q207_edu_hs==1, "High School",
                                                ifelse(nasa$q207_edu_intermediateabove==1, "Intermediate or Greater", 0))))

mycontrols  <- tableby.control(test=TRUE, total=FALSE,
                               numeric.stats=c("N", "meansd"),
                               cat.stats=c("countpct"),
                               stats.labels=list(N='Count', mean="Mean (SD)", medianq1q3="Median (IQR)"),
                               digits.pct = 0L,
                               digits = 2L)

descriptives <- tableby(lpg_when_category ~
                          q202_age + 
                          factor(q201_gender) +
                          factor(hhheadfemale) +
                          education_category +
                          factor(q206_caste) +
                          q605_montlyexp +
                          Hansen3kmog +
                          road_osm +
                          factor(q609_savingamount.bi) +
                          factor(q607_bankacct),
                        data = nasa, control = mycontrols)
summary(descriptives)
write2word(descriptives, "/Users/sarikakhanwilkar/Desktop/table2.doc")

# 6.2 Table 4 ------
# summary of hh firewood collection by season 
mycontrols  <- tableby.control(test=TRUE, total=FALSE,
                               numeric.stats=c("N", "meansd", "medianq1q3"),
                               cat.stats=c("countpct"),
                               stats.labels=list(N='Count', mean="Mean (SD)", medianq1q3="Median (IQR)"),
                               digits.pct = 0L,
                               digits = 2L)

firewood_collection_summer <- tableby(factor(q502a_summer_firewood_yesno)  ~
                                        factor(q502e_summer_whyfor) + 
                                        q502a_summer_firewood_daysweek + 
                                        q502b_summer_firewood_hoursday + 
                                        q502a_summer_firewood_hoursweek, 
                                      data = nasa, control = mycontrols)

summary(firewood_collection_summer)
write2word(firewood_collection_summer, "/Users/sarikakhanwilkar/Desktop/firewoodsummer.doc")

firewood_collection_rabi <- tableby(factor(q502a_rabi_firewood_yesno)  ~ 
                                      factor(q502e_rabi_whyfor) + 
                                      q502a_rabi_firewood_daysweek + 
                                      q502b_rabi_firewood_hoursday + 
                                      q502a_rabi_firewood_hoursweek, 
                                    data = nasa, control = mycontrols)

summary(firewood_collection_rabi)
write2word(firewood_collection_rabi, "/Users/sarikakhanwilkar/Desktop/firewoodrabi.doc")

firewood_collection_winter <- tableby(factor(q502a_winter_firewood_yesno)  ~
                                        factor(q502e_winter_whyfor) + 
                                        q502a_winter_firewood_daysweek + 
                                        q502b_winter_firewood_hoursday + 
                                        q502a_winter_firewood_hoursweek, 
                                      data = nasa, control = mycontrols)
summary(firewood_collection_winter)
write2word(firewood_collection_winter, "/Users/sarikakhanwilkar/Desktop/firewoodwinter.doc")

firewood_collection_monsoon <- tableby(factor(q502a_monsoon_firewood_yesno)  ~
                                         factor(q502e_monsoon_whyfor) + 
                                         q502a_monsoon_firewood_daysweek + 
                                         q502b_monsoon_firewood_hoursday + 
                                         q502a_monsoon_firewood_hoursweek, 
                                       data = nasa, control = mycontrols)
summary(firewood_collection_monsoon)
write2word(firewood_collection_monsoon, "/Users/sarikakhanwilkar/Desktop/firewoodmonsoon.doc")

# 6.3 Table 5 ------
#hours of firewood collection by timing of LPG adoption
nasa$q502a_rabi_firewood_yesno <- ifelse((nasa$q502a_rabi_firewood_daysweek==0), 0, 1)
table(nasa$q502a_rabi_firewood_yesno)
describe(nasa$q502a_rabi_firewood_yesno)

nasa$q502a_monsoon_firewood_yesno <- ifelse((nasa$q502a_monsoon_firewood_daysweek==0), 0, 1)
table(nasa$q502a_monsoon_firewood_yesno)

nasa$q502a_summer_firewood_yesno <- ifelse((nasa$q502a_summer_firewood_daysweek==0), 0, 1)
table(nasa$q502a_summer_firewood_yesno)
nasa$q502a_winter_firewood_yesno <- ifelse((nasa$q502a_winter_firewood_daysweek==0), 0, 1)
table(nasa$q502a_winter_firewood_yesno)
nasahard <- subset(nasa, q512_firewoodcollectioncompare.no_123 == 1, select = c("Hansen3kmog", "Hansen2kmog"))
nasanohard <- subset(nasa, q512_firewoodcollectioncompare.no_123 == 0, select = c("Hansen3kmog", "Hansen2kmog"))

nasa$lpg_when_category <- ifelse(nasa$lpg_2013orbefore==1, "2013 or before", 
                                 ifelse(nasa$lpg_20142015==1, "2014-2015",
                                        ifelse(nasa$lpg_20162017==1, "2016-2017", "No LPG")))

mycontrols  <- tableby.control(test=TRUE, total=FALSE,
                               numeric.stats=c("N", "meansd", "median", "q1q3"),
                               cat.stats=c("countpct"),
                               stats.labels=list(N='Count', mean="Mean (SD)", medianq1q3="Median (IQR)"),
                               digits.pct = 0L,
                               digits = 2L)

nasamonsoon <- subset(nasa, q502a_monsoon_firewood_yesno==1) #hh who collect fw in monsoon only

descriptives <- tableby(lpg_when_category ~
                          q502a_firewood_hoursweek_3season +
                          q502a_monsoon_firewood_hoursweek,
                        data = nasamonsoon, control = mycontrols)
summary(descriptives)
write2word(descriptives, "/Users/sarikakhanwilkar/Desktop/table4monsoonv2.doc")

nasaother <- subset(nasa, consistent3season==1) #hh who collect fw consistently in other seasons

descriptives1 <- tableby(lpg_when_category ~
                           q502a_firewood_hoursweek_3season +
                           q502a_monsoon_firewood_hoursweek,
                         data = nasaother, control = mycontrols)
summary(descriptives1)
write2word(descriptives1, "/Users/sarikakhanwilkar/Desktop/table4otherseasonsv2.doc")

# 6.4 Table S4 ------
#model results with and without District Fixed Effects

#model 1
#og model 
LPGmodel1repop3km <- glm(q507_lpgcooking ~
                           logq605_montlyexp.std +
                           q608_bankamount.bi +
                           q609_savingamount.bi +  
                           q201_gender +  
                           q206_caste_otherbackward +
                           q206_caste_scheduledcaste +
                           q206_caste_scheduledtribe +
                           q207_edu_primarysecondary +
                           q207_edu_hs +
                           q207_edu_intermediateabove +
                           q512_firewoodcollectioncompare.no_123 +
                           Hansen2.74km +
                           logroad_osm.std +
                           factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel1repop3km) #AIC 6540.34

#WIHOUT disricts FEs
#Hansen3km
LPGmodel1repop3km_fe <- glm(q507_lpgcooking ~
                              logq605_montlyexp.std +
                              q608_bankamount.bi +
                              q609_savingamount.bi +  
                              q201_gender +  
                              q206_caste_otherbackward +
                              q206_caste_scheduledcaste +
                              q206_caste_scheduledtribe +
                              q207_edu_primarysecondary +
                              q207_edu_hs +
                              q207_edu_intermediateabove +
                              q512_firewoodcollectioncompare.no_123 +
                              Hansen2.74km +
                              logroad_osm.std, data = nasa, family = "binomial") 

summary(LPGmodel1repop3km_fe) #AIC 6677.35

#og
LPGownmodel <- glm(lpg_20162017 ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen2.74km +
                     logroad_osm.std +
                     factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel) #AIC: 2150.86

#without FEs
LPGownmodel_fe <- glm(lpg_20162017 ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2.74km +
                        logroad_osm.std, data = nasalpgown, family = "binomial") 

summary(LPGownmodel_fe) #AIC: 2219

#model 3
#og 3 seasons
lin.Hansen3kmlm <- lm(logq502a_firewood_hoursweek_3season ~ 
                        lpg_2013orbefore + 
                        lpg_20142015 + 
                        lpg_20162017 + 
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2.74km +
                        logroad_osm.std +
                        factor(nasa$district_anonymous)
                      , data = nasa)

summary(lin.Hansen3kmlm) 
#0.34315

#3 seasons without FE
lin.Hansen3kmlm_fe <- lm(logq502a_firewood_hoursweek_3season ~ 
                           lpg_2013orbefore + 
                           lpg_20142015 + 
                           lpg_20162017 + 
                           logq605_montlyexp.std +
                           q608_bankamount.bi +
                           q201_gender +  
                           q206_caste_otherbackward +
                           q206_caste_scheduledcaste +
                           q206_caste_scheduledtribe +
                           q207_edu_primarysecondary +
                           q207_edu_hs +
                           q207_edu_intermediateabove +
                           q512_firewoodcollectioncompare.no_123 +
                           Hansen2.74km +
                           logroad_osm.std
                         , data = nasa)

summary(lin.Hansen3kmlm_fe) 
#0.23214

#og monsoon
lin.Hansen3kmmonlm <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                           lpg_2013orbefore + 
                           lpg_20142015 + 
                           lpg_20162017 + 
                           logq605_montlyexp.std +
                           q608_bankamount.bi +
                           q201_gender +  
                           q206_caste_otherbackward +
                           q206_caste_scheduledcaste +
                           q206_caste_scheduledtribe +
                           q207_edu_primarysecondary +
                           q207_edu_hs +
                           q207_edu_intermediateabove +
                           q512_firewoodcollectioncompare.no_123 +
                           Hansen2.74km +
                           logroad_osm.std +
                           factor(nasa$district_anonymous)
                         , data = nasa)

summary(lin.Hansen3kmmonlm)
#Multiple R-squared:  0.10169

#monsoon without FE
lin.Hansen3kmmonlm_fe <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                              lpg_2013orbefore + 
                              lpg_20142015 + 
                              lpg_20162017 + 
                              logq605_montlyexp.std +
                              q608_bankamount.bi +
                              q201_gender +  
                              q206_caste_otherbackward +
                              q206_caste_scheduledcaste +
                              q206_caste_scheduledtribe +
                              q207_edu_primarysecondary +
                              q207_edu_hs +
                              q207_edu_intermediateabove +
                              q512_firewoodcollectioncompare.no_123 +
                              Hansen2.74km +
                              logroad_osm.std
                            , data = nasa)

summary(lin.Hansen3kmmonlm_fe)

# 6.5 Table S3 -----
#model results comparing R and AIC of models at buffer distances of tree cover within distance to village

#model 1
#1 km
LPGmodel1km <- glm(q507_lpgcooking ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen1km +
                     logroad_osm.std +
                     factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel1km) #AIC 6536.12
#2 km
LPGmodel2km <- glm(q507_lpgcooking ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen2km +
                     logroad_osm.std +
                     factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel2km) #AIC 6536.97
#2.74
LPGmodel2.74km <- glm(q507_lpgcooking ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2.74km +
                        logroad_osm.std +
                        factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel2.74km) #AIC 6540.34

#3
LPGmodel3km <- glm(q507_lpgcooking ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen3km +
                     logroad_osm.std +
                     factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel3km) #6541.9
#5 km 
LPGmodel5km <- glm(q507_lpgcooking ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen5km +
                     logroad_osm.std +
                     factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel5km) #6545.69
#8 km
LPGmodel8km <- glm(q507_lpgcooking ~
                     logq605_montlyexp.std +
                     q608_bankamount.bi +
                     q609_savingamount.bi +  
                     q201_gender +  
                     q206_caste_otherbackward +
                     q206_caste_scheduledcaste +
                     q206_caste_scheduledtribe +
                     q207_edu_primarysecondary +
                     q207_edu_hs +
                     q207_edu_intermediateabove +
                     q512_firewoodcollectioncompare.no_123 +
                     Hansen8km +
                     logroad_osm.std +
                     factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel8km) #6552.95
#10 km 
LPGmodel10km <- glm(q507_lpgcooking ~
                      logq605_montlyexp.std +
                      q608_bankamount.bi +
                      q609_savingamount.bi +  
                      q201_gender +  
                      q206_caste_otherbackward +
                      q206_caste_scheduledcaste +
                      q206_caste_scheduledtribe +
                      q207_edu_primarysecondary +
                      q207_edu_hs +
                      q207_edu_intermediateabove +
                      q512_firewoodcollectioncompare.no_123 +
                      Hansen10km +
                      logroad_osm.std +
                      factor(nasa$district_anonymous), data = nasa, family = "binomial") 

summary(LPGmodel10km) #6558.39

# model 2
#1 km 
LPGownmodel1km <- glm(lpg_20162017 ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen1km +
                        logroad_osm.std +
                        factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel1km) #AIC 2149.53
#2 km 
LPGownmodel2km <- glm(lpg_20162017 ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen2km +
                        logroad_osm.std +
                        factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel2km) #2149.04
#2.74
LPGownmodel2.74km <- glm(lpg_20162017 ~
                           logq605_montlyexp.std +
                           q608_bankamount.bi +
                           q609_savingamount.bi +  
                           q201_gender +  
                           q206_caste_otherbackward +
                           q206_caste_scheduledcaste +
                           q206_caste_scheduledtribe +
                           q207_edu_primarysecondary +
                           q207_edu_hs +
                           q207_edu_intermediateabove +
                           q512_firewoodcollectioncompare.no_123 +
                           Hansen2.74km +
                           logroad_osm.std +
                           factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel2.74km) #2150.86
#3 km 
LPGownmodel3km <- glm(lpg_20162017 ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen3km +
                        logroad_osm.std +
                        factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel3km)#2151.14
#5 km 
LPGownmodel5km <- glm(lpg_20162017 ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen5km +
                        logroad_osm.std +
                        factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel5km) #2152.09
#8 km 
LPGownmodel8km <- glm(lpg_20162017 ~
                        logq605_montlyexp.std +
                        q608_bankamount.bi +
                        q609_savingamount.bi +  
                        q201_gender +  
                        q206_caste_otherbackward +
                        q206_caste_scheduledcaste +
                        q206_caste_scheduledtribe +
                        q207_edu_primarysecondary +
                        q207_edu_hs +
                        q207_edu_intermediateabove +
                        q512_firewoodcollectioncompare.no_123 +
                        Hansen8km +
                        logroad_osm.std +
                        factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel8km) #2156.66
#10 km 
LPGownmodel10km <- glm(lpg_20162017 ~
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q609_savingamount.bi +  
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen10km +
                         logroad_osm.std +
                         factor(nasalpgown$district_anonymous), data = nasalpgown, family = "binomial") 

summary(LPGownmodel10km) #2163.69

# model 3
#3 seasons
lin.Hansen1km <- lm(logq502a_firewood_hoursweek_3season ~ 
                      lpg_2013orbefore + 
                      lpg_20142015 + 
                      lpg_20162017 + 
                      logq605_montlyexp.std +
                      q608_bankamount.bi +
                      q201_gender +  
                      q206_caste_otherbackward +
                      q206_caste_scheduledcaste +
                      q206_caste_scheduledtribe +
                      q207_edu_primarysecondary +
                      q207_edu_hs +
                      q207_edu_intermediateabove +
                      q512_firewoodcollectioncompare.no_123 +
                      Hansen1km +
                      logroad_osm.std +
                      factor(nasa$district_anonymous)
                    , data = nasa)

summary(lin.Hansen1km) #multiple r squared  0.33752
#2 km 
lin.Hansen2km <- lm(logq502a_firewood_hoursweek_3season ~ 
                      lpg_2013orbefore + 
                      lpg_20142015 + 
                      lpg_20162017 + 
                      logq605_montlyexp.std +
                      q608_bankamount.bi +
                      q201_gender +  
                      q206_caste_otherbackward +
                      q206_caste_scheduledcaste +
                      q206_caste_scheduledtribe +
                      q207_edu_primarysecondary +
                      q207_edu_hs +
                      q207_edu_intermediateabove +
                      q512_firewoodcollectioncompare.no_123 +
                      Hansen2km +
                      logroad_osm.std +
                      factor(nasa$district_anonymous)
                    , data = nasa)

summary(lin.Hansen2km) #0.34137
#2.74 km 
lin.Hansen2.74km <- lm(logq502a_firewood_hoursweek_3season ~ 
                         lpg_2013orbefore + 
                         lpg_20142015 + 
                         lpg_20162017 + 
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen2.74km +
                         logroad_osm.std +
                         factor(nasa$district_anonymous)
                       , data = nasa)

summary(lin.Hansen2.74km) #0.34315
#3 km 
lin.Hansen3km <- lm(logq502a_firewood_hoursweek_3season ~ 
                      lpg_2013orbefore + 
                      lpg_20142015 + 
                      lpg_20162017 + 
                      logq605_montlyexp.std +
                      q608_bankamount.bi +
                      q201_gender +  
                      q206_caste_otherbackward +
                      q206_caste_scheduledcaste +
                      q206_caste_scheduledtribe +
                      q207_edu_primarysecondary +
                      q207_edu_hs +
                      q207_edu_intermediateabove +
                      q512_firewoodcollectioncompare.no_123 +
                      Hansen3km +
                      logroad_osm.std +
                      factor(nasa$district_anonymous)
                    , data = nasa)

summary(lin.Hansen3km) #0.34363

#5 km 
lin.Hansen5km <- lm(logq502a_firewood_hoursweek_3season ~ 
                      lpg_2013orbefore + 
                      lpg_20142015 + 
                      lpg_20162017 + 
                      logq605_montlyexp.std +
                      q608_bankamount.bi +
                      q201_gender +  
                      q206_caste_otherbackward +
                      q206_caste_scheduledcaste +
                      q206_caste_scheduledtribe +
                      q207_edu_primarysecondary +
                      q207_edu_hs +
                      q207_edu_intermediateabove +
                      q512_firewoodcollectioncompare.no_123 +
                      Hansen5km +
                      logroad_osm.std +
                      factor(nasa$district_anonymous)
                    , data = nasa)

summary(lin.Hansen5km) #0.3441
#8km
lin.Hansen8km <- lm(logq502a_firewood_hoursweek_3season ~ 
                      lpg_2013orbefore + 
                      lpg_20142015 + 
                      lpg_20162017 + 
                      logq605_montlyexp.std +
                      q608_bankamount.bi +
                      q201_gender +  
                      q206_caste_otherbackward +
                      q206_caste_scheduledcaste +
                      q206_caste_scheduledtribe +
                      q207_edu_primarysecondary +
                      q207_edu_hs +
                      q207_edu_intermediateabove +
                      q512_firewoodcollectioncompare.no_123 +
                      Hansen8km +
                      logroad_osm.std +
                      factor(nasa$district_anonymous)
                    , data = nasa)

summary(lin.Hansen8km) #0.33425
#10 km
lin.Hansen10km <- lm(logq502a_firewood_hoursweek_3season ~ 
                       lpg_2013orbefore + 
                       lpg_20142015 + 
                       lpg_20162017 + 
                       logq605_montlyexp.std +
                       q608_bankamount.bi +
                       q201_gender +  
                       q206_caste_otherbackward +
                       q206_caste_scheduledcaste +
                       q206_caste_scheduledtribe +
                       q207_edu_primarysecondary +
                       q207_edu_hs +
                       q207_edu_intermediateabove +
                       q512_firewoodcollectioncompare.no_123 +
                       Hansen10km +
                       logroad_osm.std +
                       factor(nasa$district_anonymous)
                     , data = nasa)

summary(lin.Hansen10km) #0.32238

#monsoon
lin.Hansenmon1km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                         lpg_2013orbefore + 
                         lpg_20142015 + 
                         lpg_20162017 + 
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen1km +
                         logroad_osm.std +
                         factor(nasa$district_anonymous)
                       , data = nasa)

summary(lin.Hansenmon1km) #0.10117
#2 km 
lin.Hansenmon2km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                         lpg_2013orbefore + 
                         lpg_20142015 + 
                         lpg_20162017 + 
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen2km +
                         logroad_osm.std +
                         factor(nasa$district_anonymous)
                       , data = nasa)

summary(lin.Hansenmon2km) #0.10109
#2.74
lin.Hansenmon2.74km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                            lpg_2013orbefore + 
                            lpg_20142015 + 
                            lpg_20162017 + 
                            logq605_montlyexp.std +
                            q608_bankamount.bi +
                            q201_gender +  
                            q206_caste_otherbackward +
                            q206_caste_scheduledcaste +
                            q206_caste_scheduledtribe +
                            q207_edu_primarysecondary +
                            q207_edu_hs +
                            q207_edu_intermediateabove +
                            q512_firewoodcollectioncompare.no_123 +
                            Hansen2.74km +
                            logroad_osm.std +
                            factor(nasa$district_anonymous)
                          , data = nasa)

summary(lin.Hansenmon2.74km) # 0.10169
#3 km
lin.Hansenmon3km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                         lpg_2013orbefore + 
                         lpg_20142015 + 
                         lpg_20162017 + 
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen3km +
                         logroad_osm.std +
                         factor(nasa$district_anonymous)
                       , data = nasa)

summary(lin.Hansenmon3km) #0.10186

#5 km 
lin.Hansenmon5km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                         lpg_2013orbefore + 
                         lpg_20142015 + 
                         lpg_20162017 + 
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen5km +
                         logroad_osm.std +
                         factor(nasa$district_anonymous)
                       , data = nasa)

summary(lin.Hansenmon5km) #0.10102
#8 km 
lin.Hansenmon8km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                         lpg_2013orbefore + 
                         lpg_20142015 + 
                         lpg_20162017 + 
                         logq605_montlyexp.std +
                         q608_bankamount.bi +
                         q201_gender +  
                         q206_caste_otherbackward +
                         q206_caste_scheduledcaste +
                         q206_caste_scheduledtribe +
                         q207_edu_primarysecondary +
                         q207_edu_hs +
                         q207_edu_intermediateabove +
                         q512_firewoodcollectioncompare.no_123 +
                         Hansen8km +
                         logroad_osm.std +
                         factor(nasa$district_anonymous)
                       , data = nasa)

summary(lin.Hansenmon8km) #0.097979
#10 km 
lin.Hansenmon10km <- lm(logq502a_monsoon_firewood_hoursweek ~ 
                          lpg_2013orbefore + 
                          lpg_20142015 + 
                          lpg_20162017 + 
                          logq605_montlyexp.std +
                          q608_bankamount.bi +
                          q201_gender +  
                          q206_caste_otherbackward +
                          q206_caste_scheduledcaste +
                          q206_caste_scheduledtribe +
                          q207_edu_primarysecondary +
                          q207_edu_hs +
                          q207_edu_intermediateabove +
                          q512_firewoodcollectioncompare.no_123 +
                          Hansen10km +
                          logroad_osm.std +
                          factor(nasa$district_anonymous)
                        , data = nasa)

summary(lin.Hansenmon10km) #0.095179

# 6.6 Table S2 ------
#Firewood distance all seasons

describe(nasa$q502c_rabi_firewood_dist)
nasadist <- nasa[(nasa$q502c_rabi_firewood_dist>0),] #those that traveled - take out 0
describe(nasadist$q502c_rabi_firewood_dist)
table(nasadist$q502c_rabi_firewood_dist)
#4985
describe(nasadist$q502c_rabi_firewood_dist)
table(nasadist$q502c_rabi_firewood_dist)

describe(nasadist$q502c_summer_firewood_dist)
table(nasadist$q502c_summer_firewood_dist)
nasadist <- nasadist[(nasadist$q502c_summer_firewood_dist>0),]
#4984
describe(nasadist$q502c_summer_firewood_dist)
hist(nasadist$q502c_summer_firewood_dist)

describe(nasadist$q502c_winter_firewood_dist)
table(nasadist$q502c_winter_firewood_dist)

describe(nasadist$q502c_monsoon_firewood_dist)
table(nasadist$q502c_monsoon_firewood_dist)
nasadist <- nasadist[(nasadist$q502c_monsoon_firewood_dist>0),]
#4982
describe(nasadist$q502c_monsoon_firewood_dist)
hist(nasadist$q502c_monsoon_firewood_dist)

nasadist$q501c_firewood_dist_allseason <- (nasadist$q502c_monsoon_firewood_dist + 
                                             nasadist$q502c_winter_firewood_dist + 
                                             nasadist$q502c_summer_firewood_dist + 
                                             nasadist$q502c_rabi_firewood_dist)/4
describe(nasadist$q501c_firewood_dist_allseason)
hist(nasadist$q501c_firewood_dist_allseason)
quantile(nasadist$q501c_firewood_dist_allseason, na.rm=T)

describe(nasa$q502c_rabi_firewood_dist)
nasarabi <- nasa[(nasa$q502c_rabi_firewood_dist>0),]  #4985
describe(nasarabi$q502c_rabi_firewood_dist)
table(nasarabi$q502c_rabi_firewood_dist)
quantile(nasarabi$q502c_rabi_firewood_dist, na.rm=T)

describe(nasa$q502c_summer_firewood_dist)
nasasummer <- nasa[(nasa$q502c_summer_firewood_dist>0),] #4987
describe(nasasummer$q502c_summer_firewood_dist)
table(nasasummer$q502c_summer_firewood_dist)
quantile(nasasummer$q502c_summer_firewood_dist, na.rm=T)

describe(nasa$q502c_winter_firewood_dist)
nasawinter <- nasa[(nasa$q502c_winter_firewood_dist>0),] #4990
describe(nasawinter$q502c_winter_firewood_dist)
table(nasawinter$q502c_winter_firewood_dist)
quantile(nasawinter$q502c_winter_firewood_dist, na.rm=T)

describe(nasa$q502c_monsoon_firewood_dist)
table(nasa$q502c_monsoon_firewood_dist)

nasasmons <- nasa[(nasa$q502c_monsoon_firewood_dist>0),] #4982
describe(nasasmons$q502c_monsoon_firewood_dist)
table(nasasmons$q502c_monsoon_firewood_dist)
quantile(nasasmons$q502c_monsoon_firewood_dist, na.rm=T)

